From 035c87ead01aa8397b1b205afb10e61a9357c9ac Mon Sep 17 00:00:00 2001 From: Daniel Nezamabadi <55559979+dnezam@users.noreply.github.com> Date: Fri, 5 Dec 2025 22:18:16 +0100 Subject: [PATCH 01/42] Sketch new mlsexp theory --- basis/pure/mlsexpScript.sml | 129 ++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 basis/pure/mlsexpScript.sml diff --git a/basis/pure/mlsexpScript.sml b/basis/pure/mlsexpScript.sml new file mode 100644 index 0000000000..a548f3070f --- /dev/null +++ b/basis/pure/mlsexpScript.sml @@ -0,0 +1,129 @@ +(* + Definitions to lex and parse S-expressions. +*) +Theory mlsexp +Ancestors + mlstring +Libs + preamble + +Datatype: + token = OPEN | CLOSE | SYMBOL mlstring +End + +Datatype: + sexp = Atom mlstring | Expr (sexp list) +End + +Definition read_quoted_aux_def: + read_quoted_aux [] acc = NONE ∧ + read_quoted_aux (#"\""::rest) acc = + SOME ((REVERSE acc), rest) ∧ + read_quoted_aux (#"\\"::#"\""::rest) acc = + read_quoted_aux rest (#"\""::acc) ∧ + read_quoted_aux (#"\\"::#"\\"::rest) acc = + read_quoted_aux rest (#"\\"::acc) ∧ + read_quoted_aux (c::cs) acc = + read_quoted_aux cs (c::acc) +End + +(* Returns the string until a closing quote, and the rest of the input. + Fails if closing quotes are missing. *) +Definition read_quoted_def: + read_quoted (input: string) : (string # string) option = + read_quoted_aux input [] +End + +Theorem read_quoted_aux_length: + ∀input acc. + read_quoted_aux input acc = SOME (s, rest) ⇒ LENGTH rest ≤ LENGTH input +Proof + ho_match_mp_tac read_quoted_aux_ind \\ rw[] + \\ pop_assum mp_tac + \\ once_rewrite_tac[read_quoted_aux_def] + \\ EVERY_CASE_TAC + \\ rpt strip_tac \\ res_tac \\ gvs[] +QED + +Theorem read_quoted_length: + read_quoted input = SOME (s, rest) ⇒ LENGTH rest ≤ LENGTH input +Proof + rw[read_quoted_def] \\ imp_res_tac read_quoted_aux_length +QED + +Definition read_atom_aux_def: + read_atom_aux [] acc = + ((REVERSE acc), []) ∧ + read_atom_aux (c::cs) acc = + if MEM c ") \t\n" then ((REVERSE acc), (c::cs)) + else read_atom_aux cs (c::acc) +End + +(* Returns the string until a closing parenthesis or whitespace, and the + rest of the input. *) +Definition read_atom_def: + read_atom (input: string) : (string # string) = + read_atom_aux input [] +End + +Theorem read_atom_aux_length: + ∀input acc. + read_atom_aux input acc = (s, rest) ⇒ LENGTH rest ≤ LENGTH input +Proof + Induct + \\ simp[read_atom_aux_def] + \\ rw[read_atom_aux_def] \\ res_tac \\ gvs[] +QED + +(* Where possible, we return our progress, and the rest of the input. + INL and INR indicate failure and success, respectively. *) +Type result[local,pp] = “:α + α” + +(* By tracking the depth, we can ensure we only lex one S-expression at a + time. *) +Definition lex_aux_def: + lex_aux depth [] acc : (token list # string) result = + (if depth = 0 then INR (acc, []) else INL (acc, [])) ∧ + lex_aux depth (c::cs) acc = + if MEM c " \t\n" then lex_aux depth cs acc + else if c = #"(" then lex_aux (depth + 1) cs (OPEN::acc) + else if c = #")" then + (if 0 < depth + then lex_aux (depth - 1) cs (CLOSE::acc) + else INL (acc, c::cs)) + else if c = #"\"" then + case read_quoted cs of + | NONE => INL (acc, c::cs) + | SOME (s, rest) => + lex_aux depth rest ((SYMBOL (implode s))::acc) + else + case read_atom (c::cs) of + | (s, rest) => + lex_aux depth rest ((SYMBOL (implode s))::acc) +Termination + wf_rel_tac ‘measure $ (λx. case x of (_, input, _) => LENGTH input)’ \\ rw[] + \\ imp_res_tac read_quoted_length \\ fs[] + \\ pop_assum mp_tac + \\ simp[read_atom_def] + \\ simp[Once read_atom_aux_def] + \\ strip_tac + \\ imp_res_tac read_atom_aux_length \\ fs[] +End + +(* Tokenizes (at most) one S-expression, and returns the rest of the input. *) +Definition lex_def: + lex (input: string) : (token list # string) result = + lex_aux 0 input [] +End + +(* TODO Something feels off about this... are we correctly dealing with + failures? *) +Definition parse_aux_def: + parse_aux [] xs s = xs ∧ + parse_aux (CLOSE :: rest) xs s = parse_aux rest [] (xs::s) ∧ + parse_aux (OPEN :: rest) xs s = + (case s of + | [] => parse_aux rest xs s + | (y::ys) => parse_aux rest ((Expr xs)::y) ys) ∧ + parse_aux ((SYMBOL sy) :: rest) xs s = parse_aux rest ((Atom sy)::xs) s +End From 16ac442c9bfdee65dfc804350fd394309e4552a9 Mon Sep 17 00:00:00 2001 From: Magnus Myreen Date: Fri, 5 Dec 2025 22:58:24 +0100 Subject: [PATCH 02/42] Tweak a few things --- basis/pure/README.md | 4 ++++ basis/pure/mlsexpScript.sml | 31 ++++++++++++++++++++----------- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/basis/pure/README.md b/basis/pure/README.md index 93cefdd73c..fc28352607 100644 --- a/basis/pure/README.md +++ b/basis/pure/README.md @@ -35,6 +35,10 @@ Pure functions for the Rat module. Pure functions for the Set module. This file defines a wrapper around the balanced_map type. +[mlsexpScript.sml](mlsexpScript.sml): +Definition of a simple mlstring-based s-expression, incldues +parsing and pretty printing for these s-expressions. + [mlstringLib.sml](mlstringLib.sml): More ML functions for manipulating HOL terms involving mlstrings. diff --git a/basis/pure/mlsexpScript.sml b/basis/pure/mlsexpScript.sml index a548f3070f..a1aac98a81 100644 --- a/basis/pure/mlsexpScript.sml +++ b/basis/pure/mlsexpScript.sml @@ -1,5 +1,6 @@ (* - Definitions to lex and parse S-expressions. + Definition of a simple mlstring-based s-expression, incldues + parsing and pretty printing for these s-expressions. *) Theory mlsexp Ancestors @@ -7,18 +8,26 @@ Ancestors Libs preamble +(*--------------------------------------------------------------* + Definition of s-expression + *--------------------------------------------------------------*) + Datatype: - token = OPEN | CLOSE | SYMBOL mlstring + sexp = Atom mlstring | Expr (sexp list) End +(*--------------------------------------------------------------* + Lexing + parsing + *--------------------------------------------------------------*) + Datatype: - sexp = Atom mlstring | Expr (sexp list) + token = OPEN | CLOSE | SYMBOL mlstring End Definition read_quoted_aux_def: read_quoted_aux [] acc = NONE ∧ read_quoted_aux (#"\""::rest) acc = - SOME ((REVERSE acc), rest) ∧ + SOME (implode (REVERSE acc), rest) ∧ read_quoted_aux (#"\\"::#"\""::rest) acc = read_quoted_aux rest (#"\""::acc) ∧ read_quoted_aux (#"\\"::#"\\"::rest) acc = @@ -30,7 +39,7 @@ End (* Returns the string until a closing quote, and the rest of the input. Fails if closing quotes are missing. *) Definition read_quoted_def: - read_quoted (input: string) : (string # string) option = + read_quoted (input: string) : (mlstring # string) option = read_quoted_aux input [] End @@ -53,16 +62,16 @@ QED Definition read_atom_aux_def: read_atom_aux [] acc = - ((REVERSE acc), []) ∧ + (implode (REVERSE acc), []) ∧ read_atom_aux (c::cs) acc = - if MEM c ") \t\n" then ((REVERSE acc), (c::cs)) + if MEM c ") \t\n" then (implode (REVERSE acc), (c::cs)) else read_atom_aux cs (c::acc) End (* Returns the string until a closing parenthesis or whitespace, and the rest of the input. *) Definition read_atom_def: - read_atom (input: string) : (string # string) = + read_atom (input: string) : (mlstring # string) = read_atom_aux input [] End @@ -83,7 +92,7 @@ Type result[local,pp] = “:α + α” time. *) Definition lex_aux_def: lex_aux depth [] acc : (token list # string) result = - (if depth = 0 then INR (acc, []) else INL (acc, [])) ∧ + (if depth = 0:num then INR (acc, []) else INL (acc, [])) ∧ lex_aux depth (c::cs) acc = if MEM c " \t\n" then lex_aux depth cs acc else if c = #"(" then lex_aux (depth + 1) cs (OPEN::acc) @@ -95,11 +104,11 @@ Definition lex_aux_def: case read_quoted cs of | NONE => INL (acc, c::cs) | SOME (s, rest) => - lex_aux depth rest ((SYMBOL (implode s))::acc) + lex_aux depth rest ((SYMBOL s)::acc) else case read_atom (c::cs) of | (s, rest) => - lex_aux depth rest ((SYMBOL (implode s))::acc) + lex_aux depth rest ((SYMBOL s)::acc) Termination wf_rel_tac ‘measure $ (λx. case x of (_, input, _) => LENGTH input)’ \\ rw[] \\ imp_res_tac read_quoted_length \\ fs[] From 514b3a757aa560aebe7814ac1931bfcda8d52b78 Mon Sep 17 00:00:00 2001 From: Magnus Myreen Date: Fri, 5 Dec 2025 23:27:54 +0100 Subject: [PATCH 03/42] Tweaks + add some tests --- basis/pure/mlsexpScript.sml | 45 ++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/basis/pure/mlsexpScript.sml b/basis/pure/mlsexpScript.sml index a1aac98a81..46b5f705bd 100644 --- a/basis/pure/mlsexpScript.sml +++ b/basis/pure/mlsexpScript.sml @@ -97,25 +97,26 @@ Definition lex_aux_def: if MEM c " \t\n" then lex_aux depth cs acc else if c = #"(" then lex_aux (depth + 1) cs (OPEN::acc) else if c = #")" then - (if 0 < depth - then lex_aux (depth - 1) cs (CLOSE::acc) - else INL (acc, c::cs)) + (if depth = 0 then INL (ARB, c::cs) + else if depth = 1 then INR (CLOSE::acc, cs) + else lex_aux (depth - 1) cs (CLOSE::acc)) else if c = #"\"" then case read_quoted cs of | NONE => INL (acc, c::cs) | SOME (s, rest) => - lex_aux depth rest ((SYMBOL s)::acc) + if depth = 0 then INR ([SYMBOL s], rest) + else lex_aux depth rest ((SYMBOL s)::acc) else case read_atom (c::cs) of | (s, rest) => - lex_aux depth rest ((SYMBOL s)::acc) + if depth = 0 then INR ([SYMBOL s], rest) + else lex_aux depth rest ((SYMBOL s)::acc) Termination - wf_rel_tac ‘measure $ (λx. case x of (_, input, _) => LENGTH input)’ \\ rw[] + wf_rel_tac ‘measure $ (λ(_, input, _). LENGTH input)’ \\ rw[] \\ imp_res_tac read_quoted_length \\ fs[] - \\ pop_assum mp_tac - \\ simp[read_atom_def] - \\ simp[Once read_atom_aux_def] - \\ strip_tac + \\ fs[read_atom_def] + \\ fs[Once read_atom_aux_def] + \\ gvs [AllCaseEqs()] \\ imp_res_tac read_atom_aux_length \\ fs[] End @@ -125,6 +126,15 @@ Definition lex_def: lex_aux 0 input [] End +Theorem test_lex[local]: + lex " 1 2 3 " = INR ([SYMBOL «1»]," 2 3 ") ∧ + lex " (1 2) 3 " = INR ([CLOSE; SYMBOL «2»; SYMBOL «1»; OPEN]," 3 ") ∧ + lex " (1 2) ) " = INR ([CLOSE; SYMBOL «2»; SYMBOL «1»; OPEN]," ) ") ∧ + lex " ) (1 2) " = INL (ARB,") (1 2) ") +Proof + EVAL_TAC +QED + (* TODO Something feels off about this... are we correctly dealing with failures? *) Definition parse_aux_def: @@ -136,3 +146,18 @@ Definition parse_aux_def: | (y::ys) => parse_aux rest ((Expr xs)::y) ys) ∧ parse_aux ((SYMBOL sy) :: rest) xs s = parse_aux rest ((Atom sy)::xs) s End + +Definition parse_def: + parse input = + case lex input of + | INL (err, rest) => INL (err, rest) + | INR (rev_toks, rest) => INR (parse_aux rev_toks [] [], rest) +End + +Theorem test_parse[local]: + parse " 1 2 3 " = INR ([Atom «1»]," 2 3 ") ∧ + parse "(1 2 3)" = INR ([Expr [Atom «1»; Atom «2»; Atom «3»]],"") ∧ + parse "(()) ()" = INR ([Expr [Expr []]]," ()") +Proof + EVAL_TAC +QED From 3ad3541fca035eab738fe3df47ece52c697c762f Mon Sep 17 00:00:00 2001 From: Daniel Nezamabadi <55559979+dnezam@users.noreply.github.com> Date: Fri, 5 Dec 2025 23:38:21 +0100 Subject: [PATCH 04/42] Renaming + Fix typo --- basis/pure/mlsexpScript.sml | 74 ++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/basis/pure/mlsexpScript.sml b/basis/pure/mlsexpScript.sml index 46b5f705bd..5dfa005322 100644 --- a/basis/pure/mlsexpScript.sml +++ b/basis/pure/mlsexpScript.sml @@ -1,5 +1,5 @@ (* - Definition of a simple mlstring-based s-expression, incldues + Definition of a simple mlstring-based s-expression, includes parsing and pretty printing for these s-expressions. *) Theory mlsexp @@ -24,64 +24,64 @@ Datatype: token = OPEN | CLOSE | SYMBOL mlstring End -Definition read_quoted_aux_def: - read_quoted_aux [] acc = NONE ∧ - read_quoted_aux (#"\""::rest) acc = +Definition read_string_aux_def: + read_string_aux [] acc = NONE ∧ + read_string_aux (#"\""::rest) acc = SOME (implode (REVERSE acc), rest) ∧ - read_quoted_aux (#"\\"::#"\""::rest) acc = - read_quoted_aux rest (#"\""::acc) ∧ - read_quoted_aux (#"\\"::#"\\"::rest) acc = - read_quoted_aux rest (#"\\"::acc) ∧ - read_quoted_aux (c::cs) acc = - read_quoted_aux cs (c::acc) + read_string_aux (#"\\"::#"\""::rest) acc = + read_string_aux rest (#"\""::acc) ∧ + read_string_aux (#"\\"::#"\\"::rest) acc = + read_string_aux rest (#"\\"::acc) ∧ + read_string_aux (c::cs) acc = + read_string_aux cs (c::acc) End (* Returns the string until a closing quote, and the rest of the input. Fails if closing quotes are missing. *) -Definition read_quoted_def: - read_quoted (input: string) : (mlstring # string) option = - read_quoted_aux input [] +Definition read_string_def: + read_string (input: string) : (mlstring # string) option = + read_string_aux input [] End -Theorem read_quoted_aux_length: +Theorem read_string_aux_length: ∀input acc. - read_quoted_aux input acc = SOME (s, rest) ⇒ LENGTH rest ≤ LENGTH input + read_string_aux input acc = SOME (s, rest) ⇒ LENGTH rest ≤ LENGTH input Proof - ho_match_mp_tac read_quoted_aux_ind \\ rw[] + ho_match_mp_tac read_string_aux_ind \\ rw[] \\ pop_assum mp_tac - \\ once_rewrite_tac[read_quoted_aux_def] + \\ once_rewrite_tac[read_string_aux_def] \\ EVERY_CASE_TAC \\ rpt strip_tac \\ res_tac \\ gvs[] QED -Theorem read_quoted_length: - read_quoted input = SOME (s, rest) ⇒ LENGTH rest ≤ LENGTH input +Theorem read_string_length: + read_string input = SOME (s, rest) ⇒ LENGTH rest ≤ LENGTH input Proof - rw[read_quoted_def] \\ imp_res_tac read_quoted_aux_length + rw[read_string_def] \\ imp_res_tac read_string_aux_length QED -Definition read_atom_aux_def: - read_atom_aux [] acc = +Definition read_symbol_aux_def: + read_symbol_aux [] acc = (implode (REVERSE acc), []) ∧ - read_atom_aux (c::cs) acc = + read_symbol_aux (c::cs) acc = if MEM c ") \t\n" then (implode (REVERSE acc), (c::cs)) - else read_atom_aux cs (c::acc) + else read_symbol_aux cs (c::acc) End (* Returns the string until a closing parenthesis or whitespace, and the rest of the input. *) -Definition read_atom_def: - read_atom (input: string) : (mlstring # string) = - read_atom_aux input [] +Definition read_symbol_def: + read_symbol (input: string) : (mlstring # string) = + read_symbol_aux input [] End -Theorem read_atom_aux_length: +Theorem read_symbol_aux_length: ∀input acc. - read_atom_aux input acc = (s, rest) ⇒ LENGTH rest ≤ LENGTH input + read_symbol_aux input acc = (s, rest) ⇒ LENGTH rest ≤ LENGTH input Proof Induct - \\ simp[read_atom_aux_def] - \\ rw[read_atom_aux_def] \\ res_tac \\ gvs[] + \\ simp[read_symbol_aux_def] + \\ rw[read_symbol_aux_def] \\ res_tac \\ gvs[] QED (* Where possible, we return our progress, and the rest of the input. @@ -101,23 +101,23 @@ Definition lex_aux_def: else if depth = 1 then INR (CLOSE::acc, cs) else lex_aux (depth - 1) cs (CLOSE::acc)) else if c = #"\"" then - case read_quoted cs of + case read_string cs of | NONE => INL (acc, c::cs) | SOME (s, rest) => if depth = 0 then INR ([SYMBOL s], rest) else lex_aux depth rest ((SYMBOL s)::acc) else - case read_atom (c::cs) of + case read_symbol (c::cs) of | (s, rest) => if depth = 0 then INR ([SYMBOL s], rest) else lex_aux depth rest ((SYMBOL s)::acc) Termination wf_rel_tac ‘measure $ (λ(_, input, _). LENGTH input)’ \\ rw[] - \\ imp_res_tac read_quoted_length \\ fs[] - \\ fs[read_atom_def] - \\ fs[Once read_atom_aux_def] + \\ imp_res_tac read_string_length \\ fs[] + \\ fs[read_symbol_def] + \\ fs[Once read_symbol_aux_def] \\ gvs [AllCaseEqs()] - \\ imp_res_tac read_atom_aux_length \\ fs[] + \\ imp_res_tac read_symbol_aux_length \\ fs[] End (* Tokenizes (at most) one S-expression, and returns the rest of the input. *) From a1314c80f42058769585e81e2eeb9afd5564507e Mon Sep 17 00:00:00 2001 From: Daniel Nezamabadi <55559979+dnezam@users.noreply.github.com> Date: Sat, 6 Dec 2025 00:00:49 +0100 Subject: [PATCH 05/42] [mlsexp] Make lexer fail with an error message --- basis/pure/mlsexpScript.sml | 42 +++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/basis/pure/mlsexpScript.sml b/basis/pure/mlsexpScript.sml index 5dfa005322..ed58c32f63 100644 --- a/basis/pure/mlsexpScript.sml +++ b/basis/pure/mlsexpScript.sml @@ -25,9 +25,9 @@ Datatype: End Definition read_string_aux_def: - read_string_aux [] acc = NONE ∧ + read_string_aux [] acc = INL «read_string_aux: unterminated string literal» ∧ read_string_aux (#"\""::rest) acc = - SOME (implode (REVERSE acc), rest) ∧ + INR (implode (REVERSE acc), rest) ∧ read_string_aux (#"\\"::#"\""::rest) acc = read_string_aux rest (#"\""::acc) ∧ read_string_aux (#"\\"::#"\\"::rest) acc = @@ -37,15 +37,15 @@ Definition read_string_aux_def: End (* Returns the string until a closing quote, and the rest of the input. - Fails if closing quotes are missing. *) + Fails with an error message if closing quotes are missing. *) Definition read_string_def: - read_string (input: string) : (mlstring # string) option = + read_string (input: string) : mlstring + (mlstring # string) = read_string_aux input [] End Theorem read_string_aux_length: ∀input acc. - read_string_aux input acc = SOME (s, rest) ⇒ LENGTH rest ≤ LENGTH input + read_string_aux input acc = INR (s, rest) ⇒ LENGTH rest ≤ LENGTH input Proof ho_match_mp_tac read_string_aux_ind \\ rw[] \\ pop_assum mp_tac @@ -55,7 +55,7 @@ Proof QED Theorem read_string_length: - read_string input = SOME (s, rest) ⇒ LENGTH rest ≤ LENGTH input + read_string input = INR (s, rest) ⇒ LENGTH rest ≤ LENGTH input Proof rw[read_string_def] \\ imp_res_tac read_string_aux_length QED @@ -84,26 +84,23 @@ Proof \\ rw[read_symbol_aux_def] \\ res_tac \\ gvs[] QED -(* Where possible, we return our progress, and the rest of the input. - INL and INR indicate failure and success, respectively. *) -Type result[local,pp] = “:α + α” - (* By tracking the depth, we can ensure we only lex one S-expression at a time. *) Definition lex_aux_def: - lex_aux depth [] acc : (token list # string) result = - (if depth = 0:num then INR (acc, []) else INL (acc, [])) ∧ + lex_aux depth [] acc : mlstring + (token list # string) = + (if depth = 0:num then INR (acc, []) + else INL «lex_aux: missing closing parenthesis») ∧ lex_aux depth (c::cs) acc = if MEM c " \t\n" then lex_aux depth cs acc else if c = #"(" then lex_aux (depth + 1) cs (OPEN::acc) else if c = #")" then - (if depth = 0 then INL (ARB, c::cs) + (if depth = 0 then INL «lex_aux: too many closing parenthesis» else if depth = 1 then INR (CLOSE::acc, cs) else lex_aux (depth - 1) cs (CLOSE::acc)) else if c = #"\"" then case read_string cs of - | NONE => INL (acc, c::cs) - | SOME (s, rest) => + | INL msg => INL msg + | INR (s, rest) => if depth = 0 then INR ([SYMBOL s], rest) else lex_aux depth rest ((SYMBOL s)::acc) else @@ -120,9 +117,11 @@ Termination \\ imp_res_tac read_symbol_aux_length \\ fs[] End -(* Tokenizes (at most) one S-expression, and returns the rest of the input. *) +(* Tokenizes (at most) one S-expression, and returns the rest of the input. + Fails if parentheses are not balanced, or if unterminated string literals + are present. *) Definition lex_def: - lex (input: string) : (token list # string) result = + lex (input: string) : mlstring + (token list # string) = lex_aux 0 input [] End @@ -130,7 +129,8 @@ Theorem test_lex[local]: lex " 1 2 3 " = INR ([SYMBOL «1»]," 2 3 ") ∧ lex " (1 2) 3 " = INR ([CLOSE; SYMBOL «2»; SYMBOL «1»; OPEN]," 3 ") ∧ lex " (1 2) ) " = INR ([CLOSE; SYMBOL «2»; SYMBOL «1»; OPEN]," ) ") ∧ - lex " ) (1 2) " = INL (ARB,") (1 2) ") + lex " (1 2 " = INL «lex_aux: missing closing parenthesis» ∧ + lex " ) (1 2) " = INL «lex_aux: too many closing parenthesis» Proof EVAL_TAC QED @@ -147,10 +147,12 @@ Definition parse_aux_def: parse_aux ((SYMBOL sy) :: rest) xs s = parse_aux rest ((Atom sy)::xs) s End +(* Parses (at most) one S-expression, and returns the rest of the input. + Fails exactly when lexing fails. *) Definition parse_def: - parse input = + parse (input: string) : mlstring + (sexp list # string) = case lex input of - | INL (err, rest) => INL (err, rest) + | INL msg => INL msg | INR (rev_toks, rest) => INR (parse_aux rev_toks [] [], rest) End From fa69ec8eeef5a3d6e4b99917a3fe4eb54f0074ec Mon Sep 17 00:00:00 2001 From: Daniel Nezamabadi <55559979+dnezam@users.noreply.github.com> Date: Sat, 6 Dec 2025 00:13:32 +0100 Subject: [PATCH 06/42] [mlsexp] Add unescaping in the lexer --- basis/pure/mlsexpScript.sml | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/basis/pure/mlsexpScript.sml b/basis/pure/mlsexpScript.sml index ed58c32f63..efd89495df 100644 --- a/basis/pure/mlsexpScript.sml +++ b/basis/pure/mlsexpScript.sml @@ -24,14 +24,25 @@ Datatype: token = OPEN | CLOSE | SYMBOL mlstring End +(* In addition to reading until a closing quote, it also unescapes some common + escape characters. *) Definition read_string_aux_def: - read_string_aux [] acc = INL «read_string_aux: unterminated string literal» ∧ + read_string_aux [] acc = + INL «read_string_aux: unterminated string literal» ∧ read_string_aux (#"\""::rest) acc = INR (implode (REVERSE acc), rest) ∧ read_string_aux (#"\\"::#"\""::rest) acc = read_string_aux rest (#"\""::acc) ∧ read_string_aux (#"\\"::#"\\"::rest) acc = read_string_aux rest (#"\\"::acc) ∧ + read_string_aux (#"\\"::#"0"::rest) acc = + read_string_aux rest (#"\000"::acc) ∧ + read_string_aux (#"\\"::#"n"::rest) acc = + read_string_aux rest (#"\n"::acc) ∧ + read_string_aux (#"\\"::#"r"::rest) acc = + read_string_aux rest (#"\r"::acc) ∧ + read_string_aux (#"\\"::#"t"::rest) acc = + read_string_aux rest (#"\t"::acc) ∧ read_string_aux (c::cs) acc = read_string_aux cs (c::acc) End @@ -127,6 +138,7 @@ End Theorem test_lex[local]: lex " 1 2 3 " = INR ([SYMBOL «1»]," 2 3 ") ∧ + lex "\"\n \" 2" = INR ([SYMBOL «\n »]," 2") ∧ lex " (1 2) 3 " = INR ([CLOSE; SYMBOL «2»; SYMBOL «1»; OPEN]," 3 ") ∧ lex " (1 2) ) " = INR ([CLOSE; SYMBOL «2»; SYMBOL «1»; OPEN]," ) ") ∧ lex " (1 2 " = INL «lex_aux: missing closing parenthesis» ∧ From 022e8e519f278f758a376ee44f0741d32d71f851 Mon Sep 17 00:00:00 2001 From: Daniel Nezamabadi <55559979+dnezam@users.noreply.github.com> Date: Sat, 6 Dec 2025 00:15:29 +0100 Subject: [PATCH 07/42] [mlsexp] Syntactically clean up parse_aux_def --- basis/pure/mlsexpScript.sml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/basis/pure/mlsexpScript.sml b/basis/pure/mlsexpScript.sml index efd89495df..7c318c4597 100644 --- a/basis/pure/mlsexpScript.sml +++ b/basis/pure/mlsexpScript.sml @@ -147,16 +147,14 @@ Proof EVAL_TAC QED -(* TODO Something feels off about this... are we correctly dealing with - failures? *) Definition parse_aux_def: parse_aux [] xs s = xs ∧ - parse_aux (CLOSE :: rest) xs s = parse_aux rest [] (xs::s) ∧ - parse_aux (OPEN :: rest) xs s = + parse_aux (CLOSE::rest) xs s = parse_aux rest [] (xs::s) ∧ + parse_aux (OPEN::rest) xs s = (case s of | [] => parse_aux rest xs s | (y::ys) => parse_aux rest ((Expr xs)::y) ys) ∧ - parse_aux ((SYMBOL sy) :: rest) xs s = parse_aux rest ((Atom sy)::xs) s + parse_aux ((SYMBOL sy)::rest) xs s = parse_aux rest ((Atom sy)::xs) s End (* Parses (at most) one S-expression, and returns the rest of the input. From 301992cb4b650fd279c6409df626c7ceb5197006 Mon Sep 17 00:00:00 2001 From: Magnus Myreen Date: Sat, 6 Dec 2025 12:46:42 +0100 Subject: [PATCH 08/42] Tidy up read_string_aux --- basis/pure/README.md | 2 +- basis/pure/mlsexpScript.sml | 30 ++++++++++++++---------------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/basis/pure/README.md b/basis/pure/README.md index fc28352607..6ab6bc85af 100644 --- a/basis/pure/README.md +++ b/basis/pure/README.md @@ -36,7 +36,7 @@ Pure functions for the Set module. This file defines a wrapper around the balanced_map type. [mlsexpScript.sml](mlsexpScript.sml): -Definition of a simple mlstring-based s-expression, incldues +Definition of a simple mlstring-based s-expression, includes parsing and pretty printing for these s-expressions. [mlstringLib.sml](mlstringLib.sml): diff --git a/basis/pure/mlsexpScript.sml b/basis/pure/mlsexpScript.sml index 7c318c4597..4c1dd389da 100644 --- a/basis/pure/mlsexpScript.sml +++ b/basis/pure/mlsexpScript.sml @@ -29,22 +29,20 @@ End Definition read_string_aux_def: read_string_aux [] acc = INL «read_string_aux: unterminated string literal» ∧ - read_string_aux (#"\""::rest) acc = - INR (implode (REVERSE acc), rest) ∧ - read_string_aux (#"\\"::#"\""::rest) acc = - read_string_aux rest (#"\""::acc) ∧ - read_string_aux (#"\\"::#"\\"::rest) acc = - read_string_aux rest (#"\\"::acc) ∧ - read_string_aux (#"\\"::#"0"::rest) acc = - read_string_aux rest (#"\000"::acc) ∧ - read_string_aux (#"\\"::#"n"::rest) acc = - read_string_aux rest (#"\n"::acc) ∧ - read_string_aux (#"\\"::#"r"::rest) acc = - read_string_aux rest (#"\r"::acc) ∧ - read_string_aux (#"\\"::#"t"::rest) acc = - read_string_aux rest (#"\t"::acc) ∧ - read_string_aux (c::cs) acc = - read_string_aux cs (c::acc) + read_string_aux (c::rest) acc = + if c = #"\"" then INR (implode (REVERSE acc), rest) else + if c = #"\\" then + (case rest of + | [] => read_string_aux rest acc (* causes error: unterminated string *) + | (e::rest) => + (if e = #"\\" then read_string_aux rest (#"\""::acc) else + if e = #"0" then read_string_aux rest (#"\000"::acc) else + if e = #"n" then read_string_aux rest (#"\n"::acc) else + if e = #"r" then read_string_aux rest (#"\""::acc) else + if e = #"t" then read_string_aux rest (#"\""::acc) else + INL «read_string_aux: unrecognised escape»)) + else + read_string_aux rest (c::acc) End (* Returns the string until a closing quote, and the rest of the input. From 8c5a12c0f614dc76698e9294633d255770aff2ea Mon Sep 17 00:00:00 2001 From: Daniel Nezamabadi <55559979+dnezam@users.noreply.github.com> Date: Sat, 6 Dec 2025 12:54:40 +0100 Subject: [PATCH 09/42] [mlsexp] Tidy up some comments --- basis/pure/mlsexpScript.sml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/basis/pure/mlsexpScript.sml b/basis/pure/mlsexpScript.sml index 4c1dd389da..cdaf6c9ec5 100644 --- a/basis/pure/mlsexpScript.sml +++ b/basis/pure/mlsexpScript.sml @@ -24,8 +24,6 @@ Datatype: token = OPEN | CLOSE | SYMBOL mlstring End -(* In addition to reading until a closing quote, it also unescapes some common - escape characters. *) Definition read_string_aux_def: read_string_aux [] acc = INL «read_string_aux: unterminated string literal» ∧ @@ -46,7 +44,8 @@ Definition read_string_aux_def: End (* Returns the string until a closing quote, and the rest of the input. - Fails with an error message if closing quotes are missing. *) + Fails with an error message if closing quotes are missing or an + unrecognised escape sequence occurs. *) Definition read_string_def: read_string (input: string) : mlstring + (mlstring # string) = read_string_aux input [] @@ -127,8 +126,8 @@ Termination End (* Tokenizes (at most) one S-expression, and returns the rest of the input. - Fails if parentheses are not balanced, or if unterminated string literals - are present. *) + Fails with an error message if parentheses are unbalanced or + read_string fails on a string literal. *) Definition lex_def: lex (input: string) : mlstring + (token list # string) = lex_aux 0 input [] From 24e7942d853cb2db0682e52807ca3f7f92015af7 Mon Sep 17 00:00:00 2001 From: Daniel Nezamabadi <55559979+dnezam@users.noreply.github.com> Date: Sat, 6 Dec 2025 13:35:20 +0100 Subject: [PATCH 10/42] [mlsexp] Use isSpace --- basis/pure/mlsexpScript.sml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/basis/pure/mlsexpScript.sml b/basis/pure/mlsexpScript.sml index cdaf6c9ec5..386489cc41 100644 --- a/basis/pure/mlsexpScript.sml +++ b/basis/pure/mlsexpScript.sml @@ -72,7 +72,7 @@ Definition read_symbol_aux_def: read_symbol_aux [] acc = (implode (REVERSE acc), []) ∧ read_symbol_aux (c::cs) acc = - if MEM c ") \t\n" then (implode (REVERSE acc), (c::cs)) + if c = #")" ∨ isSpace c then (implode (REVERSE acc), (c::cs)) else read_symbol_aux cs (c::acc) End @@ -99,7 +99,7 @@ Definition lex_aux_def: (if depth = 0:num then INR (acc, []) else INL «lex_aux: missing closing parenthesis») ∧ lex_aux depth (c::cs) acc = - if MEM c " \t\n" then lex_aux depth cs acc + if isSpace c then lex_aux depth cs acc else if c = #"(" then lex_aux (depth + 1) cs (OPEN::acc) else if c = #")" then (if depth = 0 then INL «lex_aux: too many closing parenthesis» From a1ff5bd67eb7474eb6b162c1658c1d4694c9192f Mon Sep 17 00:00:00 2001 From: Daniel Nezamabadi <55559979+dnezam@users.noreply.github.com> Date: Sat, 6 Dec 2025 13:57:04 +0100 Subject: [PATCH 11/42] [mlsexp] Sketch SexpProg --- basis/SexpProgScript.sml | 93 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 basis/SexpProgScript.sml diff --git a/basis/SexpProgScript.sml b/basis/SexpProgScript.sml new file mode 100644 index 0000000000..7c4dabef39 --- /dev/null +++ b/basis/SexpProgScript.sml @@ -0,0 +1,93 @@ +(* + Module for parsing and pretty-printing s-expressions. +*) +Theory SexpProg +Ancestors + mlsexp + TextIOProg +Libs + preamble + ml_translatorLib (* translation_extends, register_type, .. *) + basisFunctionsLib (* add_cakeml *) + + +val _ = translation_extends "TextIOProg"; + +val _ = register_type “:mlsexp$sexp”; +val _ = register_type “:mlsexp$token”; + +Quote add_cakeml: + exception LexFail string +End + +Quote add_cakeml: + fun read_string_aux input acc = + case TextIO.b_input1 input of + NONE => raise LexFail "unterminated string literal" + | SOME c => + if c = #"\"" then (String.implode (List.rev acc), input) else + if c = #"\\" then + (case TextIO.b_input1 input of + NONE => read_string_aux input acc (* causes error: unterminated string *) + | SOME e => + (if e = #"\\" then read_string_aux input (#"\""::acc) else + if e = #"0" then read_string_aux input (#"\000"::acc) else + if e = #"n" then read_string_aux input (#"\n"::acc) else + if e = #"r" then read_string_aux input (#"\""::acc) else + if e = #"t" then read_string_aux input (#"\""::acc) else + raise LexFail "unrecognised escape")) + else + read_string_aux input (c::acc) +End + +Quote add_cakeml: + fun read_string input = read_string_aux input [] +End + +Quote add_cakeml: + fun read_symbol_aux input acc = + case TextIO.b_input1 input of + NONE => (String.implode (List.rev acc), input) + | SOME c => + if c = #")" orelse Char.isSpace c + then (String.implode (List.rev acc), input) + else read_symbol_aux input (c::acc) +End + +Quote add_cakeml: + fun read_symbol input = read_symbol input [] +End + +Quote add_cakeml: + fun lex_aux depth input acc = + case TextIO.b_input1 input of + NONE => if depth = 0 then (acc, input) + else raise LexFail "missing closing parenthesis" + | SOME c => + if Char.isSpace c then lex_aux depth input acc + else if c = #"(" then lex_aux (depth + 1) input (OPEN::acc) + else if c = #")" then + (if depth = 0 then raise LexFail "too many closing parenthesis" + else if depth = 1 then (CLOSE::acc, input) + else lex_aux (depth - 1) input (CLOSE::acc)) + else if c = #"\"" then + let val (s, input) = read_string input in + if depth = 0 then ([SYMBOL s], input) + else lex_aux depth input ((SYMBOL s)::acc) end + else + let val (s, input) = read_symbol input in + if depth = 0 then ([SYMBOL s], input) + else lex_aux depth input ((SYMBOL s)::acc) end +End + +Quote add_cakeml: + fun lex input = lex_aux 0 input [] +End + +val r = translate mlsexpTheory.parse_aux_def; + +Quote add_cakeml: + fun parse input = let + val (rev_toks, input) = lex input + in (parse_aux rev_toks [] [], input) end +End From 2e794c83ae685844516ff9273d5c608d1ec99bca Mon Sep 17 00:00:00 2001 From: Magnus Myreen Date: Sat, 6 Dec 2025 18:16:54 +0100 Subject: [PATCH 12/42] Minor fix to parse + start on pretty printing --- basis/pure/mlsexpScript.sml | 50 +++++++++++++++++++++++++++++++++---- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/basis/pure/mlsexpScript.sml b/basis/pure/mlsexpScript.sml index 386489cc41..62e239ecd6 100644 --- a/basis/pure/mlsexpScript.sml +++ b/basis/pure/mlsexpScript.sml @@ -157,16 +157,56 @@ End (* Parses (at most) one S-expression, and returns the rest of the input. Fails exactly when lexing fails. *) Definition parse_def: - parse (input: string) : mlstring + (sexp list # string) = + parse (input: string) : mlstring + (sexp # string) = case lex input of | INL msg => INL msg - | INR (rev_toks, rest) => INR (parse_aux rev_toks [] [], rest) + | INR (rev_toks, rest) => + case parse_aux rev_toks [] [] of + | [] => INL «parse: empty input» + | (v::_) => INR (v, rest) End Theorem test_parse[local]: - parse " 1 2 3 " = INR ([Atom «1»]," 2 3 ") ∧ - parse "(1 2 3)" = INR ([Expr [Atom «1»; Atom «2»; Atom «3»]],"") ∧ - parse "(()) ()" = INR ([Expr [Expr []]]," ()") + parse " 1 2 3 " = INR (Atom «1»," 2 3 ") ∧ + parse "(1 2 3)" = INR (Expr [Atom «1»; Atom «2»; Atom «3»],"") ∧ + parse "(()) ()" = INR (Expr [Expr []]," ()") Proof EVAL_TAC QED + +(*--------------------------------------------------------------* + Pretty printing + *--------------------------------------------------------------*) + +Definition to_tokens_def: + to_tokens (Atom a) = [SYMBOL a] ∧ + to_tokens (Expr xs) = [OPEN] ++ FLAT (MAP (λx. to_tokens x) xs) ++ [CLOSE] +End + +Theorem parse_aux_to_tokens_lemma[local]: + ∀x xs s rest. + parse_aux (REVERSE (to_tokens x) ++ rest) xs s = + parse_aux rest (x::xs) s +Proof + ho_match_mp_tac to_tokens_ind \\ rpt strip_tac + >- fs [to_tokens_def,parse_aux_def] + \\ fs [to_tokens_def,parse_aux_def] + \\ qsuff_tac + ‘∀rest ys s. + parse_aux (REVERSE (FLAT (MAP (λx. to_tokens x) xs)) ++ rest) ys s = + parse_aux rest (xs ++ ys) s’ + >- + (strip_tac + \\ full_simp_tac std_ss [SF ETA_ss, GSYM APPEND_ASSOC] + \\ simp [parse_aux_def]) + \\ Induct_on ‘xs’ using SNOC_INDUCT >- fs [] + \\ fs [SF DNF_ss, SNOC_APPEND] + \\ full_simp_tac std_ss [GSYM APPEND_ASSOC, APPEND] +QED + +Theorem parse_aux_to_tokens_thm: + parse_aux (REVERSE (to_tokens x)) [] [] = [x] +Proof + CONV_TAC (PATH_CONV "lrl" (ONCE_REWRITE_CONV [GSYM APPEND_NIL |> cj 1])) + \\ rewrite_tac [parse_aux_to_tokens_lemma] \\ fs [parse_aux_def] +QED From 94a2043a49250cfde233825a3bd2028846d87727 Mon Sep 17 00:00:00 2001 From: Daniel Nezamabadi <55559979+dnezam@users.noreply.github.com> Date: Sat, 6 Dec 2025 17:59:27 +0100 Subject: [PATCH 13/42] [mlsexp] mlsexp: return rest of input even in case of failure --- basis/pure/mlsexpScript.sml | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/basis/pure/mlsexpScript.sml b/basis/pure/mlsexpScript.sml index 62e239ecd6..25b10e8199 100644 --- a/basis/pure/mlsexpScript.sml +++ b/basis/pure/mlsexpScript.sml @@ -24,9 +24,13 @@ Datatype: token = OPEN | CLOSE | SYMBOL mlstring End +(* Functions may not consume the entire input (string), so they always return + the rest of the string, independent of success or failure. *) +Type result[local,pp] = “:(α # string) + (β # string)” + Definition read_string_aux_def: read_string_aux [] acc = - INL «read_string_aux: unterminated string literal» ∧ + INL («read_string_aux: unterminated string literal», []) ∧ read_string_aux (c::rest) acc = if c = #"\"" then INR (implode (REVERSE acc), rest) else if c = #"\\" then @@ -38,7 +42,7 @@ Definition read_string_aux_def: if e = #"n" then read_string_aux rest (#"\n"::acc) else if e = #"r" then read_string_aux rest (#"\""::acc) else if e = #"t" then read_string_aux rest (#"\""::acc) else - INL «read_string_aux: unrecognised escape»)) + INL («read_string_aux: unrecognised escape», rest))) else read_string_aux rest (c::acc) End @@ -47,7 +51,7 @@ End Fails with an error message if closing quotes are missing or an unrecognised escape sequence occurs. *) Definition read_string_def: - read_string (input: string) : mlstring + (mlstring # string) = + read_string (input: string) : (mlstring, mlstring) result = read_string_aux input [] End @@ -95,19 +99,19 @@ QED (* By tracking the depth, we can ensure we only lex one S-expression at a time. *) Definition lex_aux_def: - lex_aux depth [] acc : mlstring + (token list # string) = + lex_aux depth [] acc : (mlstring, token list) result = (if depth = 0:num then INR (acc, []) - else INL «lex_aux: missing closing parenthesis») ∧ + else INL («lex_aux: missing closing parenthesis», [])) ∧ lex_aux depth (c::cs) acc = if isSpace c then lex_aux depth cs acc else if c = #"(" then lex_aux (depth + 1) cs (OPEN::acc) else if c = #")" then - (if depth = 0 then INL «lex_aux: too many closing parenthesis» + (if depth = 0 then INL («lex_aux: too many closing parenthesis», cs) else if depth = 1 then INR (CLOSE::acc, cs) else lex_aux (depth - 1) cs (CLOSE::acc)) else if c = #"\"" then case read_string cs of - | INL msg => INL msg + | INL (msg, rest) => INL (msg, rest) | INR (s, rest) => if depth = 0 then INR ([SYMBOL s], rest) else lex_aux depth rest ((SYMBOL s)::acc) @@ -129,7 +133,7 @@ End Fails with an error message if parentheses are unbalanced or read_string fails on a string literal. *) Definition lex_def: - lex (input: string) : mlstring + (token list # string) = + lex (input: string) : (mlstring, token list) result = lex_aux 0 input [] End @@ -138,8 +142,8 @@ Theorem test_lex[local]: lex "\"\n \" 2" = INR ([SYMBOL «\n »]," 2") ∧ lex " (1 2) 3 " = INR ([CLOSE; SYMBOL «2»; SYMBOL «1»; OPEN]," 3 ") ∧ lex " (1 2) ) " = INR ([CLOSE; SYMBOL «2»; SYMBOL «1»; OPEN]," ) ") ∧ - lex " (1 2 " = INL «lex_aux: missing closing parenthesis» ∧ - lex " ) (1 2) " = INL «lex_aux: too many closing parenthesis» + lex " (1 2 " = INL («lex_aux: missing closing parenthesis», "") ∧ + lex " ) (1 2) " = INL («lex_aux: too many closing parenthesis», " (1 2) ") Proof EVAL_TAC QED @@ -157,12 +161,12 @@ End (* Parses (at most) one S-expression, and returns the rest of the input. Fails exactly when lexing fails. *) Definition parse_def: - parse (input: string) : mlstring + (sexp # string) = + parse (input: string) : (mlstring, sexp) result = case lex input of - | INL msg => INL msg + | INL (msg, rest) => INL (msg, rest) | INR (rev_toks, rest) => case parse_aux rev_toks [] [] of - | [] => INL «parse: empty input» + | [] => INL («parse: empty input», rest) | (v::_) => INR (v, rest) End From 73c18517f252aecde9c1ead6224e280fc09aad74 Mon Sep 17 00:00:00 2001 From: Daniel Nezamabadi <55559979+dnezam@users.noreply.github.com> Date: Sat, 6 Dec 2025 19:42:55 +0100 Subject: [PATCH 14/42] Factor out get_exn_conv to basisFunctionsLib --- basis/TextIOProgScript.sml | 5 ----- basis/basisFunctionsLib.sig | 1 + basis/basisFunctionsLib.sml | 5 +++++ 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/basis/TextIOProgScript.sml b/basis/TextIOProgScript.sml index 1d184ca52d..a0ffca0264 100644 --- a/basis/TextIOProgScript.sml +++ b/basis/TextIOProgScript.sml @@ -82,10 +82,6 @@ End val _ = ml_prog_update open_local_block; -fun get_exn_conv name = - EVAL ``lookup_cons (Short ^name) ^(get_env (get_ml_prog_state ()))`` - |> concl |> rand |> rand |> rand - val BadFileName = get_exn_conv ``"BadFileName"`` val InvalidFD = get_exn_conv ``"InvalidFD"`` val EndOfFile = get_exn_conv ``"EndOfFile"`` @@ -785,4 +781,3 @@ End val _ = ml_prog_update close_local_blocks; val _ = ml_prog_update (close_module NONE); - diff --git a/basis/basisFunctionsLib.sig b/basis/basisFunctionsLib.sig index 0c88a0e672..f258201d8b 100644 --- a/basis/basisFunctionsLib.sig +++ b/basis/basisFunctionsLib.sig @@ -7,6 +7,7 @@ sig include Abbrev val get_module_prefix : unit -> string + val get_exn_conv : term -> term val trans : string -> term -> thm val append_dec : term -> unit val append_decs : term -> unit diff --git a/basis/basisFunctionsLib.sml b/basis/basisFunctionsLib.sml index 81cc49f9dd..6eef5b0cfb 100644 --- a/basis/basisFunctionsLib.sml +++ b/basis/basisFunctionsLib.sml @@ -16,6 +16,11 @@ fun get_module_prefix () = let | (m :: ms) => m ^ "_" end +fun get_exn_conv name = + EVAL “ml_prog$lookup_cons (namespace$Short ^name) + ^(get_env (get_ml_prog_state ()))” + |> concl |> rand |> rand |> rand + fun trans ml_name rhs = let val prefix = get_module_prefix () val hol_name = prefix ^ pick_name ml_name From 603859ef8f3a9659337e3363a9ac89c90ab7c71a Mon Sep 17 00:00:00 2001 From: Daniel Nezamabadi <55559979+dnezam@users.noreply.github.com> Date: Sat, 6 Dec 2025 20:54:47 +0100 Subject: [PATCH 15/42] Add Fail exception to mlbasicsProg --- basis/mlbasicsProgScript.sml | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/basis/mlbasicsProgScript.sml b/basis/mlbasicsProgScript.sml index ffd23eca11..ea4cac18f1 100644 --- a/basis/mlbasicsProgScript.sml +++ b/basis/mlbasicsProgScript.sml @@ -1,6 +1,8 @@ (* - Bind various built-in functions to function names that the parser - expects, e.g. the parser generates a call to a function called "+" + Translates a variety of basic constructs. + + In particular, we vind various built-in functions to function names that + the parser expects, e.g. the parser generates a call to a function called "+" when it parses 1+2. *) Theory mlbasicsProg @@ -131,3 +133,14 @@ val _ = translate pair_toString_def; val _ = (next_ml_names := ["compare"]); val _ = translate comparisonTheory.pair_cmp_def; val _ = ml_prog_update (close_module NONE); + + +Quote add_cakeml: + exception Fail string +End + +val Fail_ = get_exn_conv “"Fail"”; + +Definition Fail_exn_def: + Fail_exn s v = (∃sv. v = Conv (SOME ^Fail_) [sv] ∧ STRING_TYPE s sv) +End From 5ae1192dcce4c34df8e34788a0cadca75ce64fbf Mon Sep 17 00:00:00 2001 From: Daniel Nezamabadi <55559979+dnezam@users.noreply.github.com> Date: Sat, 6 Dec 2025 23:54:36 +0100 Subject: [PATCH 16/42] [mlsexp] Start sketching spec in SexpProg --- basis/SexpProgScript.sml | 119 +++++++++++++++++++++++++++++++-------- 1 file changed, 94 insertions(+), 25 deletions(-) diff --git a/basis/SexpProgScript.sml b/basis/SexpProgScript.sml index 7c4dabef39..a9628bfcf2 100644 --- a/basis/SexpProgScript.sml +++ b/basis/SexpProgScript.sml @@ -4,38 +4,42 @@ Theory SexpProg Ancestors mlsexp + ml_translator + std_prelude (* OPTION_TYPE *) + mlbasicsProg (* Fail_exn *) TextIOProg + TextIOProof Libs preamble ml_translatorLib (* translation_extends, register_type, .. *) basisFunctionsLib (* add_cakeml *) + cfLib (* x-tactics *) +(*--------------------------------------------------------------* + Translation + *--------------------------------------------------------------*) val _ = translation_extends "TextIOProg"; val _ = register_type “:mlsexp$sexp”; val _ = register_type “:mlsexp$token”; -Quote add_cakeml: - exception LexFail string -End - Quote add_cakeml: fun read_string_aux input acc = case TextIO.b_input1 input of - NONE => raise LexFail "unterminated string literal" - | SOME c => - if c = #"\"" then (String.implode (List.rev acc), input) else + None => raise Fail "read_string_aux: unterminated string literal" + | Some c => + if c = #"\"" then String.implode (List.rev acc) else if c = #"\\" then (case TextIO.b_input1 input of - NONE => read_string_aux input acc (* causes error: unterminated string *) - | SOME e => + None => read_string_aux input acc (* causes error: unterminated string *) + | Some e => (if e = #"\\" then read_string_aux input (#"\""::acc) else if e = #"0" then read_string_aux input (#"\000"::acc) else if e = #"n" then read_string_aux input (#"\n"::acc) else if e = #"r" then read_string_aux input (#"\""::acc) else if e = #"t" then read_string_aux input (#"\""::acc) else - raise LexFail "unrecognised escape")) + raise Fail "read_string_aux: unrecognised escape")) else read_string_aux input (c::acc) End @@ -47,10 +51,10 @@ End Quote add_cakeml: fun read_symbol_aux input acc = case TextIO.b_input1 input of - NONE => (String.implode (List.rev acc), input) - | SOME c => + None => String.implode (List.rev acc) + | Some c => if c = #")" orelse Char.isSpace c - then (String.implode (List.rev acc), input) + then String.implode (List.rev acc) else read_symbol_aux input (c::acc) End @@ -61,22 +65,22 @@ End Quote add_cakeml: fun lex_aux depth input acc = case TextIO.b_input1 input of - NONE => if depth = 0 then (acc, input) - else raise LexFail "missing closing parenthesis" - | SOME c => + None => if depth = 0 then acc + else raise Fail "lex_aux: missing closing parenthesis" + | Some c => if Char.isSpace c then lex_aux depth input acc else if c = #"(" then lex_aux (depth + 1) input (OPEN::acc) else if c = #")" then - (if depth = 0 then raise LexFail "too many closing parenthesis" - else if depth = 1 then (CLOSE::acc, input) + (if depth = 0 then raise Fail "lex_aux: too many closing parenthesis" + else if depth = 1 then CLOSE::acc else lex_aux (depth - 1) input (CLOSE::acc)) else if c = #"\"" then - let val (s, input) = read_string input in - if depth = 0 then ([SYMBOL s], input) + let val s = read_string input in + if depth = 0 then [SYMBOL s] else lex_aux depth input ((SYMBOL s)::acc) end else - let val (s, input) = read_symbol input in - if depth = 0 then ([SYMBOL s], input) + let val s = read_symbol input in + if depth = 0 then [SYMBOL s] else lex_aux depth input ((SYMBOL s)::acc) end End @@ -87,7 +91,72 @@ End val r = translate mlsexpTheory.parse_aux_def; Quote add_cakeml: - fun parse input = let - val (rev_toks, input) = lex input - in (parse_aux rev_toks [] [], input) end + fun parse input = parse_aux (lex input) [] [] End + +(*--------------------------------------------------------------* + Prove equivalence to mlsexp + *--------------------------------------------------------------*) + +val st = get_ml_prog_state (); + +Theorem lex_aux_spec: + ∀s depth depthv acc accv p is fs fd. + NUM depth depthv ∧ LIST_TYPE MLSEXP_TOKEN_TYPE acc accv ⇒ + app (p:'ffi ffi_proj) lex_aux_v [depthv; is; accv] + (STDIO fs * INSTREAM_STR fd is s fs) + (case lex_aux depth s acc of + | INL (msg, rest) => + POSTe exn. SEP_EXISTS k. + INSTREAM_STR fd is rest (forwardFD fs fd k) * + &(Fail_exn msg exn) + | INR (toks, rest) => + POSTv tokvs. SEP_EXISTS k. + STDIO (forwardFD fs fd k) * + INSTREAM_STR fd is rest (forwardFD fs fd k) * + &(LIST_TYPE MLSEXP_TOKEN_TYPE toks tokvs)) +Proof + cheat + (* rpt strip_tac *) + (* \\ xcf "lex_aux" st *) + (* \\ xlet ‘POSTv chv. SEP_EXISTS k. *) + (* STDIO (forwardFD fs fd k) * *) + (* INSTREAM_STR fd is (TL s) (forwardFD fs fd k) * *) + (* &OPTION_TYPE CHAR (oHD s) chv’ *) + (* >- (xapp_spec b_input1_spec_str) *) + (* \\ Cases_on ‘s’ \\ gvs [OPTION_TYPE_def] *) + (* \\ xmatch *) + (* >- *) + (* (xlet_auto >- xsimpl *) + (* \\ xif \\ simp [lex_aux_def] *) + (* >- (xvar \\ xsimpl \\ qexists ‘k’ \\ xsimpl) *) + (* \\ xlet_auto >- (xcon \\ xsimpl) *) + (* \\ xraise *) + (* \\ xsimpl \\ qexists ‘k’ \\ xsimpl *) + (* \\ simp [Fail_exn_def]) *) + (* \\ xlet_auto >- xsimpl *) + (* \\ xif *) + (* >- *) + (* (xsimpl *) + (* \\ xapp) *) + (* \\ cheat *) +QED + +Theorem lex_spec: + app (p:'ffi ffi_proj) lex_v [is] + (STDIO fs * INSTREAM_STR fd is s fs) + (case lex s of + | INL (msg, rest) => + POSTe exn. SEP_EXISTS k. + INSTREAM_STR fd is rest (forwardFD fs fd k) * + &(Fail_exn msg exn) + | INR (toks, rest) => + POSTv tokvs. SEP_EXISTS k. + STDIO (forwardFD fs fd k) * + INSTREAM_STR fd is rest (forwardFD fs fd k) * + &(LIST_TYPE MLSEXP_TOKEN_TYPE toks tokvs)) +Proof + xcf "lex" st + \\ xlet_auto >- (xcon \\ xsimpl) + \\ cheat +QED From 89cca18d27361a9d0632820b2786e8d815693f4c Mon Sep 17 00:00:00 2001 From: Daniel Nezamabadi <55559979+dnezam@users.noreply.github.com> Date: Sun, 7 Dec 2025 14:17:18 +0100 Subject: [PATCH 17/42] Move xlet_autop to cfLetAuto --- characteristic/cfLetAutoLib.sig | 2 ++ characteristic/cfLetAutoLib.sml | 5 +++++ examples/lpr_checker/array/lpr_arrayFullProgScript.sml | 2 -- examples/lpr_checker/array/lpr_arrayPackingProgScript.sml | 2 -- examples/lpr_checker/array/lpr_arrayParsingProgScript.sml | 3 --- examples/lpr_checker/array/lpr_arrayProgScript.sml | 2 -- examples/lpr_checker/array/lpr_arrayRamseyProgScript.sml | 2 -- examples/pseudo_bool/array/cliqueProgScript.sml | 2 -- examples/pseudo_bool/array/cnfProgScript.sml | 2 -- examples/pseudo_bool/array/graphProgScript.sml | 3 --- examples/pseudo_bool/array/mccisProgScript.sml | 2 -- examples/pseudo_bool/array/mcisProgScript.sml | 2 -- examples/pseudo_bool/array/npbc_arrayProgScript.sml | 2 -- examples/pseudo_bool/array/npbc_fullProgScript.sml | 2 -- examples/pseudo_bool/array/npbc_parseProgScript.sml | 2 -- examples/pseudo_bool/array/subgraph_isoProgScript.sml | 2 -- examples/pseudo_bool/array/wcnfProgScript.sml | 2 -- examples/scpog_checker/array/scpog_arrayFullProgScript.sml | 2 -- examples/scpog_checker/array/scpog_arrayProgScript.sml | 3 --- examples/xlrup_checker/array/xlrup_arrayFullProgScript.sml | 2 -- examples/xlrup_checker/array/xlrup_arrayProgScript.sml | 2 -- 21 files changed, 7 insertions(+), 41 deletions(-) diff --git a/characteristic/cfLetAutoLib.sig b/characteristic/cfLetAutoLib.sig index c1ca1e8fed..0a7688a5d2 100644 --- a/characteristic/cfLetAutoLib.sig +++ b/characteristic/cfLetAutoLib.sig @@ -93,6 +93,8 @@ signature cfLetAutoLib = sig (* xlet_auto is the default function to use *) val xlet_auto : tactic + (* xlet_autop tries to generate less side goals than xlet_auto *) + val xlet_autop : tactic (* debug_get_app_spec returns the last iteration of the manipulated app_spec - very useful when trying to figure out why xlet_auto failed *) val debug_get_app_spec : unit -> thm end diff --git a/characteristic/cfLetAutoLib.sml b/characteristic/cfLetAutoLib.sml index feeb9abaf5..2e0d8ed656 100644 --- a/characteristic/cfLetAutoLib.sml +++ b/characteristic/cfLetAutoLib.sml @@ -1905,8 +1905,13 @@ fun xlet_auto (g as (asl, w)) = handle HOL_ERR e => raise (ERR "xlet_auto" (message_of e)); +(* A version of xlet_auto that tries to generate less side goals. + The heuristics are derived from common patterns seen in the wild. *) +val xlet_autop = xlet_auto >- (TRY xcon \\ xsimpl); + end + (******** DEBUG ********) (* diff --git a/examples/lpr_checker/array/lpr_arrayFullProgScript.sml b/examples/lpr_checker/array/lpr_arrayFullProgScript.sml index 40254c6265..3cc42c2b56 100644 --- a/examples/lpr_checker/array/lpr_arrayFullProgScript.sml +++ b/examples/lpr_checker/array/lpr_arrayFullProgScript.sml @@ -14,8 +14,6 @@ val _ = set_trace "BasicProvers.var_eq_old" 1 val _ = translation_extends"lpr_arrayParsingProg"; -val xlet_autop = xlet_auto >- (TRY( xcon) >> xsimpl) - val _ = translate parse_header_line_def; val parse_header_line_side = Q.prove(` diff --git a/examples/lpr_checker/array/lpr_arrayPackingProgScript.sml b/examples/lpr_checker/array/lpr_arrayPackingProgScript.sml index ff4fc11589..87be2adc4b 100644 --- a/examples/lpr_checker/array/lpr_arrayPackingProgScript.sml +++ b/examples/lpr_checker/array/lpr_arrayPackingProgScript.sml @@ -16,8 +16,6 @@ val _ = set_trace "BasicProvers.var_eq_old" 1 val _ = translation_extends"lpr_arrayParsingProg"; -val xlet_autop = xlet_auto >- (TRY( xcon) >> xsimpl) - val usage_string = ‘ Input: diff --git a/examples/lpr_checker/array/lpr_arrayParsingProgScript.sml b/examples/lpr_checker/array/lpr_arrayParsingProgScript.sml index d7524fdbd9..36f88e72ed 100644 --- a/examples/lpr_checker/array/lpr_arrayParsingProgScript.sml +++ b/examples/lpr_checker/array/lpr_arrayParsingProgScript.sml @@ -14,8 +14,6 @@ val _ = set_trace "BasicProvers.var_eq_old" 1 val _ = translation_extends"lpr_arrayProg"; -val xlet_autop = xlet_auto >- (TRY( xcon) >> xsimpl) - (* TODO: Mostly copied from mlintTheory *) val result = translate (fromChar_unsafe_def |> REWRITE_RULE [GSYM ml_translatorTheory.sub_check_def]); @@ -1449,4 +1447,3 @@ Proof simp[Once CONJ_COMM]>> asm_exists_tac>>simp[] QED - diff --git a/examples/lpr_checker/array/lpr_arrayProgScript.sml b/examples/lpr_checker/array/lpr_arrayProgScript.sml index 7a93a8d80d..954ee514ba 100644 --- a/examples/lpr_checker/array/lpr_arrayProgScript.sml +++ b/examples/lpr_checker/array/lpr_arrayProgScript.sml @@ -84,8 +84,6 @@ val delete_literals_sing_arr_def = process_topdecs` if every_one_arr carr cs then ~c else raise Fail (format_failure lno "clause not empty or singleton after reduction")` |> append_prog -val xlet_autop = xlet_auto >- (TRY( xcon) >> xsimpl) - Theorem any_el_eq_EL[simp]: LENGTH Clist > index h ⇒ any_el (index h) Clist w8z = EL (index h) Clist diff --git a/examples/lpr_checker/array/lpr_arrayRamseyProgScript.sml b/examples/lpr_checker/array/lpr_arrayRamseyProgScript.sml index 8213615252..e28a2ce034 100644 --- a/examples/lpr_checker/array/lpr_arrayRamseyProgScript.sml +++ b/examples/lpr_checker/array/lpr_arrayRamseyProgScript.sml @@ -15,8 +15,6 @@ val _ = set_trace "BasicProvers.var_eq_old" 1 val _ = translation_extends"lpr_arrayParsingProg"; -val xlet_autop = xlet_auto >- (TRY( xcon) >> xsimpl) - (* This function is not specific to Ramsey, can take any encoder *) (* 0 arg *) diff --git a/examples/pseudo_bool/array/cliqueProgScript.sml b/examples/pseudo_bool/array/cliqueProgScript.sml index dee9cfac58..4ec96a8f4b 100644 --- a/examples/pseudo_bool/array/cliqueProgScript.sml +++ b/examples/pseudo_bool/array/cliqueProgScript.sml @@ -9,8 +9,6 @@ Libs val _ = translation_extends"graphProg"; -val xlet_autop = xlet_auto >- (TRY( xcon) >> xsimpl) - val res = translate enc_string_def; val res = translate pbcTheory.map_obj_def; val res = translate clique_obj_def; diff --git a/examples/pseudo_bool/array/cnfProgScript.sml b/examples/pseudo_bool/array/cnfProgScript.sml index a7827aa360..0b07953929 100644 --- a/examples/pseudo_bool/array/cnfProgScript.sml +++ b/examples/pseudo_bool/array/cnfProgScript.sml @@ -9,8 +9,6 @@ Libs val _ = translation_extends "npbc_parseProg"; -val xlet_autop = xlet_auto >- (TRY( xcon) >> xsimpl) - (* TODO: COPIED from lpr_arrayFullProgScript.sml *) Theorem fastForwardFD_ADELKEY_same[simp]: forwardFD fs fd n with infds updated_by ADELKEY fd = diff --git a/examples/pseudo_bool/array/graphProgScript.sml b/examples/pseudo_bool/array/graphProgScript.sml index 2854e324b5..f067222f73 100644 --- a/examples/pseudo_bool/array/graphProgScript.sml +++ b/examples/pseudo_bool/array/graphProgScript.sml @@ -9,8 +9,6 @@ Libs val _ = translation_extends"npbc_parseProg"; -val xlet_autop = xlet_auto >- (TRY( xcon) >> xsimpl) - Overload "graph_TYPE" = ``PAIR_TYPE NUM (SPTREE_SPT_TYPE (SPTREE_SPT_TYPE UNIT_TYPE))``; val res = translate is_edge_def; @@ -225,4 +223,3 @@ Proof xcon>>xsimpl>> simp[SUM_TYPE_def] QED - diff --git a/examples/pseudo_bool/array/mccisProgScript.sml b/examples/pseudo_bool/array/mccisProgScript.sml index 5f685f8257..c20a56ed55 100644 --- a/examples/pseudo_bool/array/mccisProgScript.sml +++ b/examples/pseudo_bool/array/mccisProgScript.sml @@ -9,8 +9,6 @@ Libs val _ = translation_extends"graphProg"; -val xlet_autop = xlet_auto >- (TRY( xcon) >> xsimpl) - (* The encoder *) val res = translate enc_string_def; diff --git a/examples/pseudo_bool/array/mcisProgScript.sml b/examples/pseudo_bool/array/mcisProgScript.sml index c1b4c202bb..8c54e9780e 100644 --- a/examples/pseudo_bool/array/mcisProgScript.sml +++ b/examples/pseudo_bool/array/mcisProgScript.sml @@ -9,8 +9,6 @@ Libs val _ = translation_extends"graphProg"; -val xlet_autop = xlet_auto >- (TRY( xcon) >> xsimpl) - (* The encoder *) val res = translate enc_string_def; diff --git a/examples/pseudo_bool/array/npbc_arrayProgScript.sml b/examples/pseudo_bool/array/npbc_arrayProgScript.sml index d2fac53430..4bb443e7f4 100644 --- a/examples/pseudo_bool/array/npbc_arrayProgScript.sml +++ b/examples/pseudo_bool/array/npbc_arrayProgScript.sml @@ -9,8 +9,6 @@ Ancestors val _ = translation_extends"UnsafeProg"; -val xlet_autop = xlet_auto >- (TRY( xcon) >> xsimpl) - val _ = process_topdecs ` exception Fail string; ` |> append_prog diff --git a/examples/pseudo_bool/array/npbc_fullProgScript.sml b/examples/pseudo_bool/array/npbc_fullProgScript.sml index 2cddf2b2fe..019f8bf71b 100644 --- a/examples/pseudo_bool/array/npbc_fullProgScript.sml +++ b/examples/pseudo_bool/array/npbc_fullProgScript.sml @@ -9,8 +9,6 @@ Libs val _ = translation_extends"npbc_parseProg"; -val xlet_autop = xlet_auto >- (TRY( xcon) >> xsimpl) - (* Translation for parsing an OPB file *) val r = translate nocomment_line_def; diff --git a/examples/pseudo_bool/array/npbc_parseProgScript.sml b/examples/pseudo_bool/array/npbc_parseProgScript.sml index 6fe55cbfb7..dd97e4cf2c 100644 --- a/examples/pseudo_bool/array/npbc_parseProgScript.sml +++ b/examples/pseudo_bool/array/npbc_parseProgScript.sml @@ -11,8 +11,6 @@ val _ = translation_extends"npbc_arrayProg"; val () = computeLib.set_skip computeLib.the_compset “COND” (SOME 1); -val xlet_autop = xlet_auto >- (TRY( xcon) >> xsimpl) - val r = translate strip_numbers_aux_def; val strip_numbers_aux_side_def = theorem "strip_numbers_aux_side_def"; val strip_numbers_aux_side = Q.prove( diff --git a/examples/pseudo_bool/array/subgraph_isoProgScript.sml b/examples/pseudo_bool/array/subgraph_isoProgScript.sml index 691992a0b3..064928c5e3 100644 --- a/examples/pseudo_bool/array/subgraph_isoProgScript.sml +++ b/examples/pseudo_bool/array/subgraph_isoProgScript.sml @@ -9,8 +9,6 @@ Libs val _ = translation_extends "graphProg"; -val xlet_autop = xlet_auto >- (TRY( xcon) >> xsimpl) - (* The encoder *) (* Translate the string converter *) diff --git a/examples/pseudo_bool/array/wcnfProgScript.sml b/examples/pseudo_bool/array/wcnfProgScript.sml index eed79fe457..9a69dbbbe4 100644 --- a/examples/pseudo_bool/array/wcnfProgScript.sml +++ b/examples/pseudo_bool/array/wcnfProgScript.sml @@ -9,8 +9,6 @@ Libs val _ = translation_extends "npbc_parseProg"; -val xlet_autop = xlet_auto >- (TRY( xcon) >> xsimpl) - (* TODO: COPIED from lpr_arrayFullProgScript.sml *) Theorem fastForwardFD_ADELKEY_same[simp]: forwardFD fs fd n with infds updated_by ADELKEY fd = diff --git a/examples/scpog_checker/array/scpog_arrayFullProgScript.sml b/examples/scpog_checker/array/scpog_arrayFullProgScript.sml index 17fda87c16..bfff4c3162 100644 --- a/examples/scpog_checker/array/scpog_arrayFullProgScript.sml +++ b/examples/scpog_checker/array/scpog_arrayFullProgScript.sml @@ -12,8 +12,6 @@ val _ = diminish_srw_ss ["ABBREV"] val _ = translation_extends"scpog_arrayProg"; -val xlet_autop = xlet_auto >- (TRY( xcon) >> xsimpl) - val res = translate opt_union_def; val res = translate parse_show_def; diff --git a/examples/scpog_checker/array/scpog_arrayProgScript.sml b/examples/scpog_checker/array/scpog_arrayProgScript.sml index 9512f4e343..42cd7c2f4f 100644 --- a/examples/scpog_checker/array/scpog_arrayProgScript.sml +++ b/examples/scpog_checker/array/scpog_arrayProgScript.sml @@ -83,8 +83,6 @@ val delete_literals_sing_arr_def = process_topdecs` if every_one_arr carr cs then ~c else raise Fail (format_failure lno ("clause at index not empty or singleton after reduction: " ^ Int.toString i))` |> append_prog -val xlet_autop = xlet_auto >- (TRY( xcon) >> xsimpl) - Theorem update_resize_LUPDATE[simp]: LENGTH Clist > index h ⇒ update_resize Clist w8z v (index h) = LUPDATE v (index h) Clist @@ -2385,4 +2383,3 @@ Proof xsimpl>> metis_tac[] QED - diff --git a/examples/xlrup_checker/array/xlrup_arrayFullProgScript.sml b/examples/xlrup_checker/array/xlrup_arrayFullProgScript.sml index bb6e09cf39..0cb5085e33 100644 --- a/examples/xlrup_checker/array/xlrup_arrayFullProgScript.sml +++ b/examples/xlrup_checker/array/xlrup_arrayFullProgScript.sml @@ -14,8 +14,6 @@ val _ = set_trace "BasicProvers.var_eq_old" 1 val _ = translation_extends"xlrup_arrayProg"; -val xlet_autop = xlet_auto >- (TRY( xcon) >> xsimpl) - val _ = translate parse_header_line_def; val parse_header_line_side = Q.prove(` diff --git a/examples/xlrup_checker/array/xlrup_arrayProgScript.sml b/examples/xlrup_checker/array/xlrup_arrayProgScript.sml index 38195e24c5..44a68ca2f1 100644 --- a/examples/xlrup_checker/array/xlrup_arrayProgScript.sml +++ b/examples/xlrup_checker/array/xlrup_arrayProgScript.sml @@ -89,8 +89,6 @@ val delete_literals_sing_arr_def = process_topdecs` if every_one_arr carr cs then ~c else raise Fail (format_failure lno ("clause at index not empty or singleton after reduction: " ^ Int.toString i))` |> append_prog -val xlet_autop = xlet_auto >- (TRY( xcon) >> xsimpl) - Theorem list_lookup_eq_EL[simp]: LENGTH Clist > index h ⇒ list_lookup Clist w8z (index h) = EL (index h) Clist From b134d94006862cd867b3083caf8aa61bbe6d1eed Mon Sep 17 00:00:00 2001 From: Daniel Nezamabadi <55559979+dnezam@users.noreply.github.com> Date: Sun, 7 Dec 2025 15:48:39 +0100 Subject: [PATCH 18/42] [mlsexp] Try to make progress in proofs --- basis/SexpProgScript.sml | 253 ++++++++++++++++++++++++++++++--------- 1 file changed, 198 insertions(+), 55 deletions(-) diff --git a/basis/SexpProgScript.sml b/basis/SexpProgScript.sml index a9628bfcf2..7605d04fd5 100644 --- a/basis/SexpProgScript.sml +++ b/basis/SexpProgScript.sml @@ -7,6 +7,7 @@ Ancestors ml_translator std_prelude (* OPTION_TYPE *) mlbasicsProg (* Fail_exn *) + fsFFIProps (* forwardFD_o *) TextIOProg TextIOProof Libs @@ -69,19 +70,19 @@ Quote add_cakeml: else raise Fail "lex_aux: missing closing parenthesis" | Some c => if Char.isSpace c then lex_aux depth input acc - else if c = #"(" then lex_aux (depth + 1) input (OPEN::acc) + else if c = #"(" then lex_aux (depth + 1) input (Open::acc) else if c = #")" then (if depth = 0 then raise Fail "lex_aux: too many closing parenthesis" - else if depth = 1 then CLOSE::acc - else lex_aux (depth - 1) input (CLOSE::acc)) + else if depth = 1 then Close::acc + else lex_aux (depth - 1) input (Close::acc)) else if c = #"\"" then let val s = read_string input in - if depth = 0 then [SYMBOL s] - else lex_aux depth input ((SYMBOL s)::acc) end + if depth = 0 then [Symbol s] + else lex_aux depth input ((Symbol s)::acc) end else let val s = read_symbol input in - if depth = 0 then [SYMBOL s] - else lex_aux depth input ((SYMBOL s)::acc) end + if depth = 0 then [Symbol s] + else lex_aux depth input ((Symbol s)::acc) end End Quote add_cakeml: @@ -100,63 +101,205 @@ End val st = get_ml_prog_state (); -Theorem lex_aux_spec: - ∀s depth depthv acc accv p is fs fd. - NUM depth depthv ∧ LIST_TYPE MLSEXP_TOKEN_TYPE acc accv ⇒ - app (p:'ffi ffi_proj) lex_aux_v [depthv; is; accv] - (STDIO fs * INSTREAM_STR fd is s fs) - (case lex_aux depth s acc of - | INL (msg, rest) => +Definition read_string_aux_post_def: + read_string_aux_post s acc fs is fd = + (case read_string_aux s acc of + | INL (msg, rest) => POSTe exn. SEP_EXISTS k. INSTREAM_STR fd is rest (forwardFD fs fd k) * &(Fail_exn msg exn) - | INR (toks, rest) => - POSTv tokvs. SEP_EXISTS k. + | INR (slit, rest) => + POSTv slitv. SEP_EXISTS k. STDIO (forwardFD fs fd k) * INSTREAM_STR fd is rest (forwardFD fs fd k) * - &(LIST_TYPE MLSEXP_TOKEN_TYPE toks tokvs)) + &(STRING_TYPE slit slitv)) +End + +Theorem read_string_aux_spec: + ∀s acc accv p is fs fd. + LIST_TYPE CHAR acc accv ⇒ + app (p:'ffi ffi_proj) read_string_aux_v [is; accv] + (STDIO fs * INSTREAM_STR fd is s fs) + (read_string_aux_post s acc fs is fd) Proof - cheat - (* rpt strip_tac *) - (* \\ xcf "lex_aux" st *) - (* \\ xlet ‘POSTv chv. SEP_EXISTS k. *) - (* STDIO (forwardFD fs fd k) * *) - (* INSTREAM_STR fd is (TL s) (forwardFD fs fd k) * *) - (* &OPTION_TYPE CHAR (oHD s) chv’ *) - (* >- (xapp_spec b_input1_spec_str) *) - (* \\ Cases_on ‘s’ \\ gvs [OPTION_TYPE_def] *) - (* \\ xmatch *) - (* >- *) - (* (xlet_auto >- xsimpl *) - (* \\ xif \\ simp [lex_aux_def] *) - (* >- (xvar \\ xsimpl \\ qexists ‘k’ \\ xsimpl) *) - (* \\ xlet_auto >- (xcon \\ xsimpl) *) - (* \\ xraise *) - (* \\ xsimpl \\ qexists ‘k’ \\ xsimpl *) - (* \\ simp [Fail_exn_def]) *) - (* \\ xlet_auto >- xsimpl *) - (* \\ xif *) - (* >- *) - (* (xsimpl *) - (* \\ xapp) *) - (* \\ cheat *) + Induct + \\ rpt strip_tac + \\ qmatch_goalsub_abbrev_tac ‘read_string_aux_post s₁’ + \\ xcf "read_string_aux" st + >- + (xlet ‘POSTv chv. SEP_EXISTS k. + STDIO (forwardFD fs fd k) * + INSTREAM_STR fd is (TL s₁) (forwardFD fs fd k) * + &OPTION_TYPE CHAR (oHD s₁) chv’ + >- (xapp_spec b_input1_spec_str) + \\ unabbrev_all_tac \\ gvs [OPTION_TYPE_def] + \\ xmatch + \\ xlet_autop + \\ xraise + \\ simp [read_string_aux_post_def, read_string_aux_def] \\ xsimpl + \\ qexists ‘k’ \\ xsimpl + \\ simp [Fail_exn_def]) + >- + (xlet ‘POSTv chv. SEP_EXISTS k. + STDIO (forwardFD fs fd k) * + INSTREAM_STR fd is (TL s₁) (forwardFD fs fd k) * + &OPTION_TYPE CHAR (oHD s₁) chv’ + >- (xapp_spec b_input1_spec_str) + \\ unabbrev_all_tac \\ gvs [OPTION_TYPE_def] + \\ xmatch + \\ xlet_autop + \\ xif + >- + (xlet_autop + \\ xapp \\ xsimpl + \\ first_assum $ irule_at (Pos hd) + \\ rpt strip_tac + \\ simp [read_string_aux_post_def, read_string_aux_def] \\ xsimpl + \\ qexists ‘k’ \\ xsimpl) + \\ xlet_autop + \\ xif + >- + (rename [‘read_string_aux_post (STRING _ s)’] + \\ xlet ‘POSTv chv. SEP_EXISTS k₁. + STDIO (forwardFD fs fd (k + k₁)) * + INSTREAM_STR fd is (TL s) (forwardFD fs fd (k + k₁)) * + &OPTION_TYPE CHAR (oHD s) chv’ + >- + (xapp_spec b_input1_spec_str + \\ qexistsl [‘emp’, ‘s’, ‘forwardFD fs fd k’, ‘fd’] + \\ simp [forwardFD_o] \\ xsimpl) + \\ Cases_on ‘s’ \\ gvs [OPTION_TYPE_def] + >- + (xmatch \\ xapp + \\ first_assum $ irule_at (Pos hd) + \\ qexistsl [‘forwardFD fs fd (k + k₁)’, ‘fd’, ‘emp’] + \\ conj_tac >- xsimpl + \\ simp [read_string_aux_post_def, read_string_aux_def] + \\ xsimpl + \\ rpt strip_tac \\ simp [forwardFD_o] + \\ qmatch_goalsub_abbrev_tac ‘forwardFD fs fd kpx’ + \\ qexists ‘kpx’ \\ xsimpl) + \\ xmatch + \\ xlet_autop + \\ xif + >- cheat + (* (xlet_autop *) + (* \\ xapp *) + (* \\ qexistsl [‘emp’, ‘forwardFD fs fd (k + k₁)’, ‘fd’, ‘#"\""::acc’] *) + (* \\ simp [LIST_TYPE_def] *) + (* \\ conj_tac >- (cheat) *) + (* \\ simp [read_string_aux_post_def, read_string_aux_def] *) + (* \\ cheat *) + (* ) *) + \\ cheat) + \\ cheat) QED -Theorem lex_spec: - app (p:'ffi ffi_proj) lex_v [is] - (STDIO fs * INSTREAM_STR fd is s fs) - (case lex s of + +Definition lex_aux_post_def: + lex_aux_post depth s acc fs is fd = + (case lex_aux depth s acc of | INL (msg, rest) => - POSTe exn. SEP_EXISTS k. - INSTREAM_STR fd is rest (forwardFD fs fd k) * - &(Fail_exn msg exn) + POSTe exn. SEP_EXISTS k. + INSTREAM_STR fd is rest (forwardFD fs fd k) * + &(Fail_exn msg exn) | INR (toks, rest) => - POSTv tokvs. SEP_EXISTS k. - STDIO (forwardFD fs fd k) * - INSTREAM_STR fd is rest (forwardFD fs fd k) * - &(LIST_TYPE MLSEXP_TOKEN_TYPE toks tokvs)) + POSTv tokvs. SEP_EXISTS k. + STDIO (forwardFD fs fd k) * + INSTREAM_STR fd is rest (forwardFD fs fd k) * + &(LIST_TYPE MLSEXP_TOKEN_TYPE toks tokvs)) +End + +val MLSEXP_TOKEN_TYPE_def = theorem "MLSEXP_TOKEN_TYPE_def"; + +(* Solve goals of the form + + lex_aux_post ... (forwardFD fs fd k) is fd *+ emp ==+> + lex_aux_post ... fs is fd *+ GC + + This usually comes up as part of a recursive call. *) +val lex_aux_SEP_IMPPOST_tac = + (simp [lex_aux_post_def, lex_aux_def, isSpace_def] + \\ ntac 2 CASE_TAC + \\ simp [forwardFD_o] + \\ xsimpl \\ rpt strip_tac + \\ qmatch_goalsub_abbrev_tac ‘forwardFD fs fd kpx’ + \\ qexists ‘kpx’ \\ xsimpl); + +(* Finish up the goal when we are returning. + Should be preceeded by an x-tactic (xraise, xcon, ...) *) +val lex_aux_ret_tac = + (simp [lex_aux_post_def, lex_aux_def] \\ xsimpl + \\ qexists ‘k’ \\ xsimpl + \\ simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def, Fail_exn_def]); + +Theorem lex_aux_spec: + ∀s depth depthv acc accv p is fs fd. + NUM depth depthv ∧ LIST_TYPE MLSEXP_TOKEN_TYPE acc accv ⇒ + app (p:'ffi ffi_proj) lex_aux_v [depthv; is; accv] + (STDIO fs * INSTREAM_STR fd is s fs) + (lex_aux_post depth s acc fs is fd) Proof - xcf "lex" st - \\ xlet_auto >- (xcon \\ xsimpl) - \\ cheat + Induct + \\ rpt strip_tac + \\ qmatch_goalsub_abbrev_tac ‘lex_aux_post _ s₁’ + \\ xcf "lex_aux" st + >- + (xlet ‘POSTv chv. SEP_EXISTS k. + STDIO (forwardFD fs fd k) * + INSTREAM_STR fd is (TL s₁) (forwardFD fs fd k) * + &OPTION_TYPE CHAR (oHD s₁) chv’ + >- (xapp_spec b_input1_spec_str) + \\ unabbrev_all_tac \\ gvs [OPTION_TYPE_def] + \\ xmatch + \\ xlet_autop + \\ xif + >- + (xvar + \\ simp [lex_aux_post_def, lex_aux_def] \\ xsimpl + >- (qexists ‘k’ \\ xsimpl)) + \\ xlet_autop + \\ xraise \\ lex_aux_ret_tac) + >- + (xlet ‘POSTv chv. SEP_EXISTS k. + STDIO (forwardFD fs fd k) * + INSTREAM_STR fd is (TL s₁) (forwardFD fs fd k) * + &OPTION_TYPE CHAR (oHD s₁) chv’ + >- (xapp_spec b_input1_spec_str) + \\ unabbrev_all_tac \\ gvs [OPTION_TYPE_def] + \\ xmatch + \\ xlet_autop + \\ xif >- + (last_x_assum $ drule_all_then assume_tac + \\ xapp + \\ qexistsl [‘emp’, ‘forwardFD fs fd k’, ‘fd’] + \\ xpull >- xsimpl + \\ lex_aux_SEP_IMPPOST_tac) + \\ xlet_autop + \\ xif >- + (ntac 3 xlet_autop + \\ xapp + \\ qexistsl [‘emp’, ‘forwardFD fs fd k’, ‘fd’, ‘depth + 1’, ‘OPEN::acc’] + \\ simp [] + \\ conj_tac >- (simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def]) + \\ xpull >- xsimpl + \\ lex_aux_SEP_IMPPOST_tac) + \\ xlet_autop + \\ xif >- + (xlet_autop + \\ xif >- (xlet_autop \\ xraise \\ lex_aux_ret_tac) + \\ xlet_autop + \\ xif >- (xlet_autop \\ xcon \\ lex_aux_ret_tac) + \\ ntac 3 xlet_autop + \\ xapp + \\ qexistsl + [‘emp’, ‘forwardFD fs fd k’, ‘fd’, ‘depth - 1’, ‘CLOSE::acc’] + \\ simp [] + \\ conj_tac >- (simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def]) + \\ xpull >- xsimpl + \\ lex_aux_SEP_IMPPOST_tac) + \\ xlet_autop + \\ xif >- + (cheat) + \\ cheat) QED From dd675e66c3fe534e9e139eec6dd3bba546637c8c Mon Sep 17 00:00:00 2001 From: Daniel Nezamabadi <55559979+dnezam@users.noreply.github.com> Date: Sun, 7 Dec 2025 17:41:55 +0100 Subject: [PATCH 19/42] [mlsexp] Prove read_string_aux_spec --- basis/SexpProgScript.sml | 223 +++++++++++++++++++++------------------ 1 file changed, 121 insertions(+), 102 deletions(-) diff --git a/basis/SexpProgScript.sml b/basis/SexpProgScript.sml index 7605d04fd5..59972936cd 100644 --- a/basis/SexpProgScript.sml +++ b/basis/SexpProgScript.sml @@ -115,6 +115,20 @@ Definition read_string_aux_post_def: &(STRING_TYPE slit slitv)) End +(* Can be used in read_string_aux_spec to finish proofs about base cases. + * k indicates how much we moved forward (passed to forwardFD) *) +fun read_string_aux_base_tac k = + (simp [read_string_aux_post_def, read_string_aux_def] \\ xsimpl + \\ qexists k \\ xsimpl \\ simp [Fail_exn_def]); + +(* Can be used in read_string_aux_spec to finish proofs about recursive cases. *) +val read_string_aux_rec_tac = + (conj_tac >- xsimpl + \\ xsimpl + \\ rpt strip_tac \\ simp [forwardFD_o] + \\ qmatch_goalsub_abbrev_tac ‘forwardFD fs fd kpx’ + \\ qexists ‘kpx’ \\ xsimpl); + Theorem read_string_aux_spec: ∀s acc accv p is fs fd. LIST_TYPE CHAR acc accv ⇒ @@ -122,10 +136,11 @@ Theorem read_string_aux_spec: (STDIO fs * INSTREAM_STR fd is s fs) (read_string_aux_post s acc fs is fd) Proof - Induct + ho_match_mp_tac read_string_aux_ind \\ rpt strip_tac \\ qmatch_goalsub_abbrev_tac ‘read_string_aux_post s₁’ \\ xcf "read_string_aux" st + (* [] *) >- (xlet ‘POSTv chv. SEP_EXISTS k. STDIO (forwardFD fs fd k) * @@ -133,12 +148,9 @@ Proof &OPTION_TYPE CHAR (oHD s₁) chv’ >- (xapp_spec b_input1_spec_str) \\ unabbrev_all_tac \\ gvs [OPTION_TYPE_def] - \\ xmatch - \\ xlet_autop - \\ xraise - \\ simp [read_string_aux_post_def, read_string_aux_def] \\ xsimpl - \\ qexists ‘k’ \\ xsimpl - \\ simp [Fail_exn_def]) + \\ xmatch \\ xlet_autop \\ xraise + \\ read_string_aux_base_tac ‘k’) + (* c::rest *) >- (xlet ‘POSTv chv. SEP_EXISTS k. STDIO (forwardFD fs fd k) * @@ -146,18 +158,16 @@ Proof &OPTION_TYPE CHAR (oHD s₁) chv’ >- (xapp_spec b_input1_spec_str) \\ unabbrev_all_tac \\ gvs [OPTION_TYPE_def] - \\ xmatch - \\ xlet_autop + \\ xmatch \\ xlet_autop \\ xif - >- + >- (* c = " *) (xlet_autop \\ xapp \\ xsimpl \\ first_assum $ irule_at (Pos hd) \\ rpt strip_tac - \\ simp [read_string_aux_post_def, read_string_aux_def] \\ xsimpl - \\ qexists ‘k’ \\ xsimpl) + \\ read_string_aux_base_tac ‘k’) \\ xlet_autop - \\ xif + \\ xif (* c = \ *) >- (rename [‘read_string_aux_post (STRING _ s)’] \\ xlet ‘POSTv chv. SEP_EXISTS k₁. @@ -169,33 +179,40 @@ Proof \\ qexistsl [‘emp’, ‘s’, ‘forwardFD fs fd k’, ‘fd’] \\ simp [forwardFD_o] \\ xsimpl) \\ Cases_on ‘s’ \\ gvs [OPTION_TYPE_def] - >- + >- (* Nothing after \ *) (xmatch \\ xapp \\ first_assum $ irule_at (Pos hd) \\ qexistsl [‘forwardFD fs fd (k + k₁)’, ‘fd’, ‘emp’] - \\ conj_tac >- xsimpl \\ simp [read_string_aux_post_def, read_string_aux_def] - \\ xsimpl - \\ rpt strip_tac \\ simp [forwardFD_o] - \\ qmatch_goalsub_abbrev_tac ‘forwardFD fs fd kpx’ - \\ qexists ‘kpx’ \\ xsimpl) + \\ read_string_aux_rec_tac) \\ xmatch - \\ xlet_autop - \\ xif - >- cheat - (* (xlet_autop *) - (* \\ xapp *) - (* \\ qexistsl [‘emp’, ‘forwardFD fs fd (k + k₁)’, ‘fd’, ‘#"\""::acc’] *) - (* \\ simp [LIST_TYPE_def] *) - (* \\ conj_tac >- (cheat) *) - (* \\ simp [read_string_aux_post_def, read_string_aux_def] *) - (* \\ cheat *) - (* ) *) - \\ cheat) - \\ cheat) + (* escape characters *) + \\ ntac 5 ( + xlet_autop + \\ xif + >- + (xlet_autop + \\ gvs [] + \\ xapp + \\ simp [LIST_TYPE_def] + \\ qexistsl [‘emp’, ‘forwardFD fs fd (k + k₁)’, ‘fd’] + \\ simp [read_string_aux_post_def, read_string_aux_def] + \\ ntac 2 CASE_TAC + \\ read_string_aux_rec_tac)) + (* unrecognised escape *) + \\ xlet_autop \\ xraise + \\ read_string_aux_base_tac ‘k + k₁’) + (* simply push c and recurse *) + \\ xlet_autop + \\ gvs [] + \\ xapp + \\ simp [LIST_TYPE_def] + \\ qexistsl [‘emp’, ‘forwardFD fs fd k’, ‘fd’] + \\ simp [read_string_aux_post_def, read_string_aux_def] + \\ ntac 2 CASE_TAC + \\ read_string_aux_rec_tac) QED - Definition lex_aux_post_def: lex_aux_post depth s acc fs is fd = (case lex_aux depth s acc of @@ -218,13 +235,14 @@ val MLSEXP_TOKEN_TYPE_def = theorem "MLSEXP_TOKEN_TYPE_def"; lex_aux_post ... fs is fd *+ GC This usually comes up as part of a recursive call. *) -val lex_aux_SEP_IMPPOST_tac = - (simp [lex_aux_post_def, lex_aux_def, isSpace_def] - \\ ntac 2 CASE_TAC - \\ simp [forwardFD_o] - \\ xsimpl \\ rpt strip_tac - \\ qmatch_goalsub_abbrev_tac ‘forwardFD fs fd kpx’ - \\ qexists ‘kpx’ \\ xsimpl); +(* TODO Move out ntac 2 ..., move in other things we repeat *) +(* val lex_aux_SEP_IMPPOST_tac = *) +(* (simp [lex_aux_post_def, lex_aux_def, isSpace_def] *) +(* \\ ntac 2 CASE_TAC *) +(* \\ simp [forwardFD_o] *) +(* \\ xsimpl \\ rpt strip_tac *) +(* \\ qmatch_goalsub_abbrev_tac ‘forwardFD fs fd kpx’ *) +(* \\ qexists ‘kpx’ \\ xsimpl); *) (* Finish up the goal when we are returning. Should be preceeded by an x-tactic (xraise, xcon, ...) *) @@ -240,66 +258,67 @@ Theorem lex_aux_spec: (STDIO fs * INSTREAM_STR fd is s fs) (lex_aux_post depth s acc fs is fd) Proof - Induct - \\ rpt strip_tac - \\ qmatch_goalsub_abbrev_tac ‘lex_aux_post _ s₁’ - \\ xcf "lex_aux" st - >- - (xlet ‘POSTv chv. SEP_EXISTS k. - STDIO (forwardFD fs fd k) * - INSTREAM_STR fd is (TL s₁) (forwardFD fs fd k) * - &OPTION_TYPE CHAR (oHD s₁) chv’ - >- (xapp_spec b_input1_spec_str) - \\ unabbrev_all_tac \\ gvs [OPTION_TYPE_def] - \\ xmatch - \\ xlet_autop - \\ xif - >- - (xvar - \\ simp [lex_aux_post_def, lex_aux_def] \\ xsimpl - >- (qexists ‘k’ \\ xsimpl)) - \\ xlet_autop - \\ xraise \\ lex_aux_ret_tac) - >- - (xlet ‘POSTv chv. SEP_EXISTS k. - STDIO (forwardFD fs fd k) * - INSTREAM_STR fd is (TL s₁) (forwardFD fs fd k) * - &OPTION_TYPE CHAR (oHD s₁) chv’ - >- (xapp_spec b_input1_spec_str) - \\ unabbrev_all_tac \\ gvs [OPTION_TYPE_def] - \\ xmatch - \\ xlet_autop - \\ xif >- - (last_x_assum $ drule_all_then assume_tac - \\ xapp - \\ qexistsl [‘emp’, ‘forwardFD fs fd k’, ‘fd’] - \\ xpull >- xsimpl - \\ lex_aux_SEP_IMPPOST_tac) - \\ xlet_autop - \\ xif >- - (ntac 3 xlet_autop - \\ xapp - \\ qexistsl [‘emp’, ‘forwardFD fs fd k’, ‘fd’, ‘depth + 1’, ‘OPEN::acc’] - \\ simp [] - \\ conj_tac >- (simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def]) - \\ xpull >- xsimpl - \\ lex_aux_SEP_IMPPOST_tac) - \\ xlet_autop - \\ xif >- - (xlet_autop - \\ xif >- (xlet_autop \\ xraise \\ lex_aux_ret_tac) - \\ xlet_autop - \\ xif >- (xlet_autop \\ xcon \\ lex_aux_ret_tac) - \\ ntac 3 xlet_autop - \\ xapp - \\ qexistsl - [‘emp’, ‘forwardFD fs fd k’, ‘fd’, ‘depth - 1’, ‘CLOSE::acc’] - \\ simp [] - \\ conj_tac >- (simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def]) - \\ xpull >- xsimpl - \\ lex_aux_SEP_IMPPOST_tac) - \\ xlet_autop - \\ xif >- - (cheat) - \\ cheat) + (* TODO induct over lex_aux_ind *) + (* Induct *) + (* \\ rpt strip_tac *) + (* \\ qmatch_goalsub_abbrev_tac ‘lex_aux_post _ s₁’ *) + (* \\ xcf "lex_aux" st *) + (* >- *) + (* (xlet ‘POSTv chv. SEP_EXISTS k. *) + (* STDIO (forwardFD fs fd k) * *) + (* INSTREAM_STR fd is (TL s₁) (forwardFD fs fd k) * *) + (* &OPTION_TYPE CHAR (oHD s₁) chv’ *) + (* >- (xapp_spec b_input1_spec_str) *) + (* \\ unabbrev_all_tac \\ gvs [OPTION_TYPE_def] *) + (* \\ xmatch *) + (* \\ xlet_autop *) + (* \\ xif *) + (* >- *) + (* (xvar *) + (* \\ simp [lex_aux_post_def, lex_aux_def] \\ xsimpl *) + (* >- (qexists ‘k’ \\ xsimpl)) *) + (* \\ xlet_autop *) + (* \\ xraise \\ lex_aux_ret_tac) *) + (* >- *) + (* (xlet ‘POSTv chv. SEP_EXISTS k. *) + (* STDIO (forwardFD fs fd k) * *) + (* INSTREAM_STR fd is (TL s₁) (forwardFD fs fd k) * *) + (* &OPTION_TYPE CHAR (oHD s₁) chv’ *) + (* >- (xapp_spec b_input1_spec_str) *) + (* \\ unabbrev_all_tac \\ gvs [OPTION_TYPE_def] *) + (* \\ xmatch *) + (* \\ xlet_autop *) + (* \\ xif >- *) + (* (last_x_assum $ drule_all_then assume_tac *) + (* \\ xapp *) + (* \\ qexistsl [‘emp’, ‘forwardFD fs fd k’, ‘fd’] *) + (* \\ xpull >- xsimpl *) + (* \\ lex_aux_SEP_IMPPOST_tac) *) + (* \\ xlet_autop *) + (* \\ xif >- *) + (* (ntac 3 xlet_autop *) + (* \\ xapp *) + (* \\ qexistsl [‘emp’, ‘forwardFD fs fd k’, ‘fd’, ‘depth + 1’, ‘OPEN::acc’] *) + (* \\ simp [] *) + (* \\ conj_tac >- (simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def]) *) + (* \\ xpull >- xsimpl *) + (* \\ lex_aux_SEP_IMPPOST_tac) *) + (* \\ xlet_autop *) + (* \\ xif >- *) + (* (xlet_autop *) + (* \\ xif >- (xlet_autop \\ xraise \\ lex_aux_ret_tac) *) + (* \\ xlet_autop *) + (* \\ xif >- (xlet_autop \\ xcon \\ lex_aux_ret_tac) *) + (* \\ ntac 3 xlet_autop *) + (* \\ xapp *) + (* \\ qexistsl *) + (* [‘emp’, ‘forwardFD fs fd k’, ‘fd’, ‘depth - 1’, ‘CLOSE::acc’] *) + (* \\ simp [] *) + (* \\ conj_tac >- (simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def]) *) + (* \\ xpull >- xsimpl *) + (* \\ lex_aux_SEP_IMPPOST_tac) *) + (* \\ xlet_autop *) + (* \\ xif >- *) + (* (cheat) *) + (* \\ cheat) *) QED From 44bbeea45e5988b697d4782b8584589a55e7d180 Mon Sep 17 00:00:00 2001 From: Daniel Nezamabadi <55559979+dnezam@users.noreply.github.com> Date: Sun, 7 Dec 2025 19:48:33 +0100 Subject: [PATCH 20/42] [mlsexp] Prove read_symbol_spec --- basis/SexpProgScript.sml | 133 +++++++++++++++++++++++++++++++++++---- 1 file changed, 122 insertions(+), 11 deletions(-) diff --git a/basis/SexpProgScript.sml b/basis/SexpProgScript.sml index 59972936cd..2ffe22bc51 100644 --- a/basis/SexpProgScript.sml +++ b/basis/SexpProgScript.sml @@ -51,16 +51,17 @@ End Quote add_cakeml: fun read_symbol_aux input acc = - case TextIO.b_input1 input of + case TextIO.b_peekChar input of None => String.implode (List.rev acc) | Some c => if c = #")" orelse Char.isSpace c then String.implode (List.rev acc) - else read_symbol_aux input (c::acc) + (* Consume c *) + else (TextIO.b_input1 input; read_symbol_aux input (c::acc)) End Quote add_cakeml: - fun read_symbol input = read_symbol input [] + fun read_symbol input = read_symbol_aux input [] End Quote add_cakeml: @@ -121,15 +122,15 @@ fun read_string_aux_base_tac k = (simp [read_string_aux_post_def, read_string_aux_def] \\ xsimpl \\ qexists k \\ xsimpl \\ simp [Fail_exn_def]); -(* Can be used in read_string_aux_spec to finish proofs about recursive cases. *) -val read_string_aux_rec_tac = - (conj_tac >- xsimpl - \\ xsimpl +(* Useful when finishing up recursive cases. Takes care of instantiating k in + * forward fs fd k. *) +val STDIO_forwardFD_INSTREAM_STR_tac = + (xsimpl \\ rpt strip_tac \\ simp [forwardFD_o] \\ qmatch_goalsub_abbrev_tac ‘forwardFD fs fd kpx’ \\ qexists ‘kpx’ \\ xsimpl); -Theorem read_string_aux_spec: +Theorem read_string_aux_spec[local]: ∀s acc accv p is fs fd. LIST_TYPE CHAR acc accv ⇒ app (p:'ffi ffi_proj) read_string_aux_v [is; accv] @@ -184,7 +185,7 @@ Proof \\ first_assum $ irule_at (Pos hd) \\ qexistsl [‘forwardFD fs fd (k + k₁)’, ‘fd’, ‘emp’] \\ simp [read_string_aux_post_def, read_string_aux_def] - \\ read_string_aux_rec_tac) + \\ STDIO_forwardFD_INSTREAM_STR_tac) \\ xmatch (* escape characters *) \\ ntac 5 ( @@ -198,7 +199,7 @@ Proof \\ qexistsl [‘emp’, ‘forwardFD fs fd (k + k₁)’, ‘fd’] \\ simp [read_string_aux_post_def, read_string_aux_def] \\ ntac 2 CASE_TAC - \\ read_string_aux_rec_tac)) + \\ STDIO_forwardFD_INSTREAM_STR_tac)) (* unrecognised escape *) \\ xlet_autop \\ xraise \\ read_string_aux_base_tac ‘k + k₁’) @@ -210,7 +211,116 @@ Proof \\ qexistsl [‘emp’, ‘forwardFD fs fd k’, ‘fd’] \\ simp [read_string_aux_post_def, read_string_aux_def] \\ ntac 2 CASE_TAC - \\ read_string_aux_rec_tac) + \\ STDIO_forwardFD_INSTREAM_STR_tac) +QED + +Theorem read_string_spec: + app (p:'ffi ffi_proj) read_string_v [is] + (STDIO fs * INSTREAM_STR fd is s fs) + (case read_string s of + | INL (msg, rest) => + POSTe exn. SEP_EXISTS k. + INSTREAM_STR fd is rest (forwardFD fs fd k) * + &(Fail_exn msg exn) + | INR (slit, rest) => + POSTv slitv. SEP_EXISTS k. + STDIO (forwardFD fs fd k) * + INSTREAM_STR fd is rest (forwardFD fs fd k) * + &(STRING_TYPE slit slitv)) +Proof + xcf "read_string" st + \\ xlet_autop \\ xapp + \\ simp [read_string_aux_post_def, read_string_def] + \\ qexistsl [‘emp’, ‘s’, ‘fs’, ‘fd’, ‘[]’] + \\ simp [LIST_TYPE_def] \\ xsimpl +QED + +Theorem read_symbol_aux_spec[local]: + ∀s acc accv p is fs fd. + LIST_TYPE CHAR acc accv ⇒ + app (p:'ffi ffi_proj) read_symbol_aux_v [is; accv] + (STDIO fs * INSTREAM_STR fd is s fs) + (case read_symbol_aux s acc of + | (slit, rest) => + POSTv slitv. SEP_EXISTS k. + STDIO (forwardFD fs fd k) * + INSTREAM_STR fd is rest (forwardFD fs fd k) * + &(STRING_TYPE slit slitv)) +Proof + Induct + \\ rpt strip_tac + \\ qmatch_goalsub_abbrev_tac ‘read_symbol_aux s₁’ + \\ xcf "read_symbol_aux" st + >- (* [] *) + (xlet ‘POSTv chv. SEP_EXISTS k. + STDIO (forwardFD fs fd k) * + INSTREAM_STR fd is s₁ (forwardFD fs fd k) * + &(OPTION_TYPE CHAR (oHD s₁) chv)’ + >- (xapp_spec b_peekChar_spec_str) + \\ unabbrev_all_tac \\ gvs [OPTION_TYPE_def] + \\ xmatch \\ xlet_autop \\ xapp + \\ first_assum $ irule_at (Pos hd) + \\ simp [read_symbol_aux_def] + \\ xsimpl \\ rpt strip_tac + \\ qexists ‘k’ \\ xsimpl) + >- (* c::cs *) + (xlet ‘POSTv chv. SEP_EXISTS k. + STDIO (forwardFD fs fd k) * + INSTREAM_STR fd is s₁ (forwardFD fs fd k) * + &(OPTION_TYPE CHAR (oHD s₁) chv)’ + >- (xapp_spec b_peekChar_spec_str) + \\ unabbrev_all_tac \\ gvs [OPTION_TYPE_def] + \\ xmatch \\ xlet_autop + \\ rename [‘read_symbol_aux (h::s)’] + \\ xlet ‘POSTv b. + STDIO (forwardFD fs fd k) * + INSTREAM_STR fd is (h::s) (forwardFD fs fd k) * + &BOOL (h = #")" ∨ isSpace h) b’ + >- + (xlog + \\ IF_CASES_TAC >- (xsimpl \\ gvs []) + \\ xapp + \\ first_assum $ irule_at (Pos hd) + \\ xsimpl) + \\ simp [read_symbol_aux_def] + \\ xif + >- + (xlet_autop \\ xapp + \\ first_assum $ irule_at (Pos hd) + \\ xsimpl \\ rpt strip_tac \\ qexists ‘k’ \\ xsimpl) + \\ xlet ‘POSTv chv. SEP_EXISTS k₁. + STDIO (forwardFD fs fd (k + k₁)) * + INSTREAM_STR fd is s (forwardFD fs fd (k + k₁)) * + &OPTION_TYPE CHAR (SOME h) chv’ + >- + (xapp_spec b_input1_spec_str + \\ qexistsl [‘emp’, ‘h::s’, ‘forwardFD fs fd k’, ‘fd’] + \\ xsimpl \\ rpt strip_tac \\ simp [forwardFD_o] + \\ qmatch_goalsub_abbrev_tac ‘forwardFD _ _ (_ + k₁)’ + \\ qexists ‘k₁’ \\ xsimpl) + \\ xlet_autop + \\ xapp + \\ qexistsl [‘emp’, ‘forwardFD fs fd (k + k₁)’, ‘fd’, ‘h::acc’] + \\ simp [LIST_TYPE_def] + \\ CASE_TAC + \\ STDIO_forwardFD_INSTREAM_STR_tac) +QED + +Theorem read_symbol_spec: + app (p:'ffi ffi_proj) read_symbol_v [is] + (STDIO fs * INSTREAM_STR fd is s fs) + (case read_symbol s of + | (slit, rest) => + POSTv slitv. SEP_EXISTS k. + STDIO (forwardFD fs fd k) * + INSTREAM_STR fd is rest (forwardFD fs fd k) * + &(STRING_TYPE slit slitv)) +Proof + xcf "read_symbol" st + \\ xlet_autop \\ xapp + \\ simp [read_symbol_def] + \\ qexistsl [‘emp’, ‘s’, ‘fs’, ‘fd’, ‘[]’] + \\ simp [LIST_TYPE_def] \\ xsimpl QED Definition lex_aux_post_def: @@ -258,6 +368,7 @@ Theorem lex_aux_spec: (STDIO fs * INSTREAM_STR fd is s fs) (lex_aux_post depth s acc fs is fd) Proof + cheat (* TODO induct over lex_aux_ind *) (* Induct *) (* \\ rpt strip_tac *) From dc5087add32f179e2681abc52a3bda461b0921d7 Mon Sep 17 00:00:00 2001 From: Daniel Nezamabadi <55559979+dnezam@users.noreply.github.com> Date: Sun, 7 Dec 2025 22:19:56 +0100 Subject: [PATCH 21/42] [mlsexp] Make progress in lex_aux_spec --- basis/SexpProgScript.sml | 191 ++++++++++++++++++++++----------------- 1 file changed, 107 insertions(+), 84 deletions(-) diff --git a/basis/SexpProgScript.sml b/basis/SexpProgScript.sml index 2ffe22bc51..55b8476426 100644 --- a/basis/SexpProgScript.sml +++ b/basis/SexpProgScript.sml @@ -339,97 +339,120 @@ End val MLSEXP_TOKEN_TYPE_def = theorem "MLSEXP_TOKEN_TYPE_def"; -(* Solve goals of the form - - lex_aux_post ... (forwardFD fs fd k) is fd *+ emp ==+> - lex_aux_post ... fs is fd *+ GC - - This usually comes up as part of a recursive call. *) -(* TODO Move out ntac 2 ..., move in other things we repeat *) -(* val lex_aux_SEP_IMPPOST_tac = *) -(* (simp [lex_aux_post_def, lex_aux_def, isSpace_def] *) -(* \\ ntac 2 CASE_TAC *) -(* \\ simp [forwardFD_o] *) -(* \\ xsimpl \\ rpt strip_tac *) -(* \\ qmatch_goalsub_abbrev_tac ‘forwardFD fs fd kpx’ *) -(* \\ qexists ‘kpx’ \\ xsimpl); *) - -(* Finish up the goal when we are returning. - Should be preceeded by an x-tactic (xraise, xcon, ...) *) -val lex_aux_ret_tac = +(* Analogous to read_string_aux_base_tac *) +fun lex_aux_base_tac k = (simp [lex_aux_post_def, lex_aux_def] \\ xsimpl - \\ qexists ‘k’ \\ xsimpl + \\ qexists k \\ xsimpl \\ simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def, Fail_exn_def]); Theorem lex_aux_spec: - ∀s depth depthv acc accv p is fs fd. + ∀ depth s acc depthv accv p is fs fd. NUM depth depthv ∧ LIST_TYPE MLSEXP_TOKEN_TYPE acc accv ⇒ app (p:'ffi ffi_proj) lex_aux_v [depthv; is; accv] (STDIO fs * INSTREAM_STR fd is s fs) (lex_aux_post depth s acc fs is fd) Proof - cheat - (* TODO induct over lex_aux_ind *) - (* Induct *) - (* \\ rpt strip_tac *) - (* \\ qmatch_goalsub_abbrev_tac ‘lex_aux_post _ s₁’ *) - (* \\ xcf "lex_aux" st *) - (* >- *) - (* (xlet ‘POSTv chv. SEP_EXISTS k. *) - (* STDIO (forwardFD fs fd k) * *) - (* INSTREAM_STR fd is (TL s₁) (forwardFD fs fd k) * *) - (* &OPTION_TYPE CHAR (oHD s₁) chv’ *) - (* >- (xapp_spec b_input1_spec_str) *) - (* \\ unabbrev_all_tac \\ gvs [OPTION_TYPE_def] *) - (* \\ xmatch *) - (* \\ xlet_autop *) - (* \\ xif *) - (* >- *) - (* (xvar *) - (* \\ simp [lex_aux_post_def, lex_aux_def] \\ xsimpl *) - (* >- (qexists ‘k’ \\ xsimpl)) *) - (* \\ xlet_autop *) - (* \\ xraise \\ lex_aux_ret_tac) *) - (* >- *) - (* (xlet ‘POSTv chv. SEP_EXISTS k. *) - (* STDIO (forwardFD fs fd k) * *) - (* INSTREAM_STR fd is (TL s₁) (forwardFD fs fd k) * *) - (* &OPTION_TYPE CHAR (oHD s₁) chv’ *) - (* >- (xapp_spec b_input1_spec_str) *) - (* \\ unabbrev_all_tac \\ gvs [OPTION_TYPE_def] *) - (* \\ xmatch *) - (* \\ xlet_autop *) - (* \\ xif >- *) - (* (last_x_assum $ drule_all_then assume_tac *) - (* \\ xapp *) - (* \\ qexistsl [‘emp’, ‘forwardFD fs fd k’, ‘fd’] *) - (* \\ xpull >- xsimpl *) - (* \\ lex_aux_SEP_IMPPOST_tac) *) - (* \\ xlet_autop *) - (* \\ xif >- *) - (* (ntac 3 xlet_autop *) - (* \\ xapp *) - (* \\ qexistsl [‘emp’, ‘forwardFD fs fd k’, ‘fd’, ‘depth + 1’, ‘OPEN::acc’] *) - (* \\ simp [] *) - (* \\ conj_tac >- (simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def]) *) - (* \\ xpull >- xsimpl *) - (* \\ lex_aux_SEP_IMPPOST_tac) *) - (* \\ xlet_autop *) - (* \\ xif >- *) - (* (xlet_autop *) - (* \\ xif >- (xlet_autop \\ xraise \\ lex_aux_ret_tac) *) - (* \\ xlet_autop *) - (* \\ xif >- (xlet_autop \\ xcon \\ lex_aux_ret_tac) *) - (* \\ ntac 3 xlet_autop *) - (* \\ xapp *) - (* \\ qexistsl *) - (* [‘emp’, ‘forwardFD fs fd k’, ‘fd’, ‘depth - 1’, ‘CLOSE::acc’] *) - (* \\ simp [] *) - (* \\ conj_tac >- (simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def]) *) - (* \\ xpull >- xsimpl *) - (* \\ lex_aux_SEP_IMPPOST_tac) *) - (* \\ xlet_autop *) - (* \\ xif >- *) - (* (cheat) *) - (* \\ cheat) *) + ho_match_mp_tac lex_aux_ind + \\ rpt strip_tac + \\ qmatch_goalsub_abbrev_tac ‘lex_aux_post _ s₁’ + \\ xcf "lex_aux" st + >- + (xlet ‘POSTv chv. SEP_EXISTS k. + STDIO (forwardFD fs fd k) * + INSTREAM_STR fd is (TL s₁) (forwardFD fs fd k) * + &OPTION_TYPE CHAR (oHD s₁) chv’ + >- (xapp_spec b_input1_spec_str) + \\ unabbrev_all_tac \\ gvs [OPTION_TYPE_def] + \\ xmatch \\ xlet_autop + \\ xif + >- (xvar \\ lex_aux_base_tac ‘k’) + \\ xlet_autop + \\ xraise \\ lex_aux_base_tac ‘k’) + + >- + (xlet ‘POSTv chv. SEP_EXISTS k. + STDIO (forwardFD fs fd k) * + INSTREAM_STR fd is (TL s₁) (forwardFD fs fd k) * + &OPTION_TYPE CHAR (oHD s₁) chv’ + >- (xapp_spec b_input1_spec_str) + \\ unabbrev_all_tac \\ gvs [OPTION_TYPE_def] + \\ rename [‘lex_aux_post _ (STRING c s)’] + \\ xmatch + \\ xlet_autop + \\ xif >- + (last_x_assum $ drule_all_then assume_tac + \\ xapp + \\ qexistsl [‘emp’, ‘forwardFD fs fd k’, ‘fd’] + \\ simp [lex_aux_post_def, lex_aux_def] + \\ ntac 2 CASE_TAC + \\ STDIO_forwardFD_INSTREAM_STR_tac) + \\ xlet_autop + \\ xif >- + (ntac 3 xlet_autop + \\ xapp + \\ qexistsl [‘emp’, ‘forwardFD fs fd k’, ‘fd’] + \\ gvs [lex_aux_post_def, lex_aux_def] + \\ conj_tac >- (simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def]) + \\ ntac 2 CASE_TAC + \\ STDIO_forwardFD_INSTREAM_STR_tac) + \\ xlet_autop + \\ xif >- + (xlet_autop + \\ xif >- (xlet_autop \\ xraise \\ lex_aux_base_tac ‘k’) + \\ xlet_autop + \\ xif >- (xlet_autop \\ xcon \\ lex_aux_base_tac ‘k’) + \\ ntac 3 xlet_autop + \\ xapp + \\ qexistsl [‘emp’, ‘forwardFD fs fd k’, ‘fd’] + \\ gvs [lex_aux_post_def, lex_aux_def] + \\ conj_tac >- (simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def]) + \\ ntac 2 CASE_TAC + \\ STDIO_forwardFD_INSTREAM_STR_tac) + \\ xlet_autop + \\ xif >- + (simp [lex_aux_post_def, lex_aux_def, isSpace_def] + \\ namedCases_on ‘mlsexp$read_string s’ ["l", "r"] + >- + (namedCases_on ‘l’ ["msg rest"] + \\ xlet ‘POSTe exn. SEP_EXISTS k. + INSTREAM_STR fd is rest (forwardFD fs fd k) * + &(Fail_exn msg exn)’ + >- + (xapp + \\ qexistsl [‘emp’, ‘s’, ‘forwardFD fs fd k’, ‘fd’] + \\ simp [] + \\ STDIO_forwardFD_INSTREAM_STR_tac) + \\ simp [] \\ xsimpl) + \\ namedCases_on ‘r’ ["slit rest"] + \\ xlet ‘POSTv slitv. SEP_EXISTS k. + STDIO (forwardFD fs fd k) * + INSTREAM_STR fd is rest (forwardFD fs fd k) * + &(STRING_TYPE slit slitv)’ + >- + (xapp + \\ qexistsl [‘emp’, ‘s’, ‘forwardFD fs fd k’, ‘fd’] + \\ simp [] + \\ STDIO_forwardFD_INSTREAM_STR_tac) + \\ xlet_autop + \\ xif + >- + (ntac 2 xlet_autop \\ xcon + \\ simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def] + \\ STDIO_forwardFD_INSTREAM_STR_tac) + \\ ntac 2 xlet_autop \\ xapp + \\ qexistsl [‘emp’, ‘forwardFD fs fd k’, ‘fd’] + \\ simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def, lex_aux_post_def] + \\ ntac 2 CASE_TAC + \\ STDIO_forwardFD_INSTREAM_STR_tac) + \\ simp [lex_aux_post_def, lex_aux_def] + \\ namedCases_on ‘read_symbol (STRING c s)’ ["sym rest"] + \\ xlet ‘POSTv slitv. SEP_EXISTS k. + STDIO (forwardFD fs fd k) * + INSTREAM_STR fd is rest (forwardFD fs fd k) * + &(STRING_TYPE slit slitv)’ + >- (xapp \\ qexistsl [‘emp’, ‘c::s’, ‘forwardFD fs fd k’, ‘fd’] + \\ simp [] + \\ xsimpl + \\ cheat (* TODO unprovable; need to use peek in lex_aux *) ) + \\ cheat) QED From 90c4b2d9cba8b01fc229efd14359441ecaa2f6d5 Mon Sep 17 00:00:00 2001 From: Daniel Nezamabadi <55559979+dnezam@users.noreply.github.com> Date: Mon, 8 Dec 2025 15:12:52 +0100 Subject: [PATCH 22/42] [mlsexp] Prove lex_aux_spec --- basis/SexpProgScript.sml | 200 +++++++++++++++++------------------- basis/pure/mlsexpScript.sml | 44 ++++---- 2 files changed, 119 insertions(+), 125 deletions(-) diff --git a/basis/SexpProgScript.sml b/basis/SexpProgScript.sml index 55b8476426..6cfe4fce61 100644 --- a/basis/SexpProgScript.sml +++ b/basis/SexpProgScript.sml @@ -50,18 +50,15 @@ Quote add_cakeml: End Quote add_cakeml: - fun read_symbol_aux input acc = + fun read_symbol input acc = case TextIO.b_peekChar input of None => String.implode (List.rev acc) | Some c => if c = #")" orelse Char.isSpace c then String.implode (List.rev acc) - (* Consume c *) - else (TextIO.b_input1 input; read_symbol_aux input (c::acc)) -End - -Quote add_cakeml: - fun read_symbol input = read_symbol_aux input [] + else ( + TextIO.b_input1 input; (* Consume c *) + read_symbol input (c::acc)) End Quote add_cakeml: @@ -81,7 +78,7 @@ Quote add_cakeml: if depth = 0 then [Symbol s] else lex_aux depth input ((Symbol s)::acc) end else - let val s = read_symbol input in + let val s = read_symbol input [c] in if depth = 0 then [Symbol s] else lex_aux depth input ((Symbol s)::acc) end End @@ -235,12 +232,12 @@ Proof \\ simp [LIST_TYPE_def] \\ xsimpl QED -Theorem read_symbol_aux_spec[local]: +Theorem read_symbol_spec[local]: ∀s acc accv p is fs fd. LIST_TYPE CHAR acc accv ⇒ - app (p:'ffi ffi_proj) read_symbol_aux_v [is; accv] + app (p:'ffi ffi_proj) read_symbol_v [is; accv] (STDIO fs * INSTREAM_STR fd is s fs) - (case read_symbol_aux s acc of + (case read_symbol s acc of | (slit, rest) => POSTv slitv. SEP_EXISTS k. STDIO (forwardFD fs fd k) * @@ -249,8 +246,8 @@ Theorem read_symbol_aux_spec[local]: Proof Induct \\ rpt strip_tac - \\ qmatch_goalsub_abbrev_tac ‘read_symbol_aux s₁’ - \\ xcf "read_symbol_aux" st + \\ qmatch_goalsub_abbrev_tac ‘read_symbol s₁’ + \\ xcf "read_symbol" st >- (* [] *) (xlet ‘POSTv chv. SEP_EXISTS k. STDIO (forwardFD fs fd k) * @@ -260,7 +257,7 @@ Proof \\ unabbrev_all_tac \\ gvs [OPTION_TYPE_def] \\ xmatch \\ xlet_autop \\ xapp \\ first_assum $ irule_at (Pos hd) - \\ simp [read_symbol_aux_def] + \\ simp [read_symbol_def] \\ xsimpl \\ rpt strip_tac \\ qexists ‘k’ \\ xsimpl) >- (* c::cs *) @@ -271,7 +268,7 @@ Proof >- (xapp_spec b_peekChar_spec_str) \\ unabbrev_all_tac \\ gvs [OPTION_TYPE_def] \\ xmatch \\ xlet_autop - \\ rename [‘read_symbol_aux (h::s)’] + \\ rename [‘read_symbol (h::s)’] \\ xlet ‘POSTv b. STDIO (forwardFD fs fd k) * INSTREAM_STR fd is (h::s) (forwardFD fs fd k) * @@ -282,7 +279,7 @@ Proof \\ xapp \\ first_assum $ irule_at (Pos hd) \\ xsimpl) - \\ simp [read_symbol_aux_def] + \\ simp [read_symbol_def] \\ xif >- (xlet_autop \\ xapp @@ -306,23 +303,6 @@ Proof \\ STDIO_forwardFD_INSTREAM_STR_tac) QED -Theorem read_symbol_spec: - app (p:'ffi ffi_proj) read_symbol_v [is] - (STDIO fs * INSTREAM_STR fd is s fs) - (case read_symbol s of - | (slit, rest) => - POSTv slitv. SEP_EXISTS k. - STDIO (forwardFD fs fd k) * - INSTREAM_STR fd is rest (forwardFD fs fd k) * - &(STRING_TYPE slit slitv)) -Proof - xcf "read_symbol" st - \\ xlet_autop \\ xapp - \\ simp [read_symbol_def] - \\ qexistsl [‘emp’, ‘s’, ‘fs’, ‘fd’, ‘[]’] - \\ simp [LIST_TYPE_def] \\ xsimpl -QED - Definition lex_aux_post_def: lex_aux_post depth s acc fs is fd = (case lex_aux depth s acc of @@ -356,6 +336,7 @@ Proof \\ rpt strip_tac \\ qmatch_goalsub_abbrev_tac ‘lex_aux_post _ s₁’ \\ xcf "lex_aux" st + (* [] *) >- (xlet ‘POSTv chv. SEP_EXISTS k. STDIO (forwardFD fs fd k) * @@ -368,91 +349,98 @@ Proof >- (xvar \\ lex_aux_base_tac ‘k’) \\ xlet_autop \\ xraise \\ lex_aux_base_tac ‘k’) - - >- - (xlet ‘POSTv chv. SEP_EXISTS k. + (* c::cs *) + \\ xlet ‘POSTv chv. SEP_EXISTS k. STDIO (forwardFD fs fd k) * INSTREAM_STR fd is (TL s₁) (forwardFD fs fd k) * &OPTION_TYPE CHAR (oHD s₁) chv’ - >- (xapp_spec b_input1_spec_str) - \\ unabbrev_all_tac \\ gvs [OPTION_TYPE_def] - \\ rename [‘lex_aux_post _ (STRING c s)’] - \\ xmatch - \\ xlet_autop - \\ xif >- - (last_x_assum $ drule_all_then assume_tac - \\ xapp - \\ qexistsl [‘emp’, ‘forwardFD fs fd k’, ‘fd’] - \\ simp [lex_aux_post_def, lex_aux_def] - \\ ntac 2 CASE_TAC - \\ STDIO_forwardFD_INSTREAM_STR_tac) - \\ xlet_autop - \\ xif >- - (ntac 3 xlet_autop - \\ xapp - \\ qexistsl [‘emp’, ‘forwardFD fs fd k’, ‘fd’] - \\ gvs [lex_aux_post_def, lex_aux_def] - \\ conj_tac >- (simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def]) - \\ ntac 2 CASE_TAC - \\ STDIO_forwardFD_INSTREAM_STR_tac) - \\ xlet_autop - \\ xif >- - (xlet_autop - \\ xif >- (xlet_autop \\ xraise \\ lex_aux_base_tac ‘k’) - \\ xlet_autop - \\ xif >- (xlet_autop \\ xcon \\ lex_aux_base_tac ‘k’) - \\ ntac 3 xlet_autop - \\ xapp - \\ qexistsl [‘emp’, ‘forwardFD fs fd k’, ‘fd’] - \\ gvs [lex_aux_post_def, lex_aux_def] - \\ conj_tac >- (simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def]) - \\ ntac 2 CASE_TAC - \\ STDIO_forwardFD_INSTREAM_STR_tac) + >- (xapp_spec b_input1_spec_str) + \\ unabbrev_all_tac \\ gvs [OPTION_TYPE_def] + \\ rename [‘lex_aux_post _ (STRING c s)’] + \\ xmatch + \\ xlet_autop + \\ xif >- + (last_x_assum $ drule_all_then assume_tac + \\ xapp + \\ qexistsl [‘emp’, ‘forwardFD fs fd k’, ‘fd’] + \\ simp [lex_aux_post_def, lex_aux_def] + \\ ntac 2 CASE_TAC + \\ STDIO_forwardFD_INSTREAM_STR_tac) + \\ xlet_autop + \\ xif >- + (ntac 3 xlet_autop + \\ xapp + \\ qexistsl [‘emp’, ‘forwardFD fs fd k’, ‘fd’] + \\ gvs [lex_aux_post_def, lex_aux_def] + \\ conj_tac >- (simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def]) + \\ ntac 2 CASE_TAC + \\ STDIO_forwardFD_INSTREAM_STR_tac) + \\ xlet_autop + \\ xif >- + (xlet_autop + \\ xif >- (xlet_autop \\ xraise \\ lex_aux_base_tac ‘k’) \\ xlet_autop - \\ xif >- - (simp [lex_aux_post_def, lex_aux_def, isSpace_def] - \\ namedCases_on ‘mlsexp$read_string s’ ["l", "r"] - >- - (namedCases_on ‘l’ ["msg rest"] - \\ xlet ‘POSTe exn. SEP_EXISTS k. - INSTREAM_STR fd is rest (forwardFD fs fd k) * - &(Fail_exn msg exn)’ - >- - (xapp - \\ qexistsl [‘emp’, ‘s’, ‘forwardFD fs fd k’, ‘fd’] - \\ simp [] - \\ STDIO_forwardFD_INSTREAM_STR_tac) - \\ simp [] \\ xsimpl) - \\ namedCases_on ‘r’ ["slit rest"] - \\ xlet ‘POSTv slitv. SEP_EXISTS k. - STDIO (forwardFD fs fd k) * + \\ xif >- (xlet_autop \\ xcon \\ lex_aux_base_tac ‘k’) + \\ ntac 3 xlet_autop + \\ xapp + \\ qexistsl [‘emp’, ‘forwardFD fs fd k’, ‘fd’] + \\ gvs [lex_aux_post_def, lex_aux_def] + \\ conj_tac >- (simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def]) + \\ ntac 2 CASE_TAC + \\ STDIO_forwardFD_INSTREAM_STR_tac) + \\ xlet_autop + \\ xif >- + (simp [lex_aux_post_def, lex_aux_def, isSpace_def] + \\ namedCases_on ‘mlsexp$read_string s’ ["l", "r"] + >- + (namedCases_on ‘l’ ["msg rest"] + \\ xlet ‘POSTe exn. SEP_EXISTS k. INSTREAM_STR fd is rest (forwardFD fs fd k) * - &(STRING_TYPE slit slitv)’ + &(Fail_exn msg exn)’ >- (xapp \\ qexistsl [‘emp’, ‘s’, ‘forwardFD fs fd k’, ‘fd’] \\ simp [] \\ STDIO_forwardFD_INSTREAM_STR_tac) - \\ xlet_autop - \\ xif - >- - (ntac 2 xlet_autop \\ xcon - \\ simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def] - \\ STDIO_forwardFD_INSTREAM_STR_tac) - \\ ntac 2 xlet_autop \\ xapp - \\ qexistsl [‘emp’, ‘forwardFD fs fd k’, ‘fd’] - \\ simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def, lex_aux_post_def] - \\ ntac 2 CASE_TAC - \\ STDIO_forwardFD_INSTREAM_STR_tac) - \\ simp [lex_aux_post_def, lex_aux_def] - \\ namedCases_on ‘read_symbol (STRING c s)’ ["sym rest"] + \\ simp [] \\ xsimpl) + \\ namedCases_on ‘r’ ["slit rest"] \\ xlet ‘POSTv slitv. SEP_EXISTS k. STDIO (forwardFD fs fd k) * INSTREAM_STR fd is rest (forwardFD fs fd k) * &(STRING_TYPE slit slitv)’ - >- (xapp \\ qexistsl [‘emp’, ‘c::s’, ‘forwardFD fs fd k’, ‘fd’] - \\ simp [] - \\ xsimpl - \\ cheat (* TODO unprovable; need to use peek in lex_aux *) ) - \\ cheat) + >- + (xapp + \\ qexistsl [‘emp’, ‘s’, ‘forwardFD fs fd k’, ‘fd’] + \\ simp [] + \\ STDIO_forwardFD_INSTREAM_STR_tac) + \\ xlet_autop + \\ xif + >- + (ntac 2 xlet_autop \\ xcon + \\ simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def] + \\ lex_aux_base_tac ‘k’) + \\ ntac 2 xlet_autop \\ xapp + \\ qexistsl [‘emp’, ‘forwardFD fs fd k’, ‘fd’] + \\ simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def, lex_aux_post_def] + \\ ntac 2 CASE_TAC + \\ STDIO_forwardFD_INSTREAM_STR_tac) + \\ simp [lex_aux_post_def, lex_aux_def] + \\ ntac 2 xlet_autop + \\ namedCases_on ‘read_symbol s [c]’ ["sym rest"] + \\ xlet ‘POSTv symv. SEP_EXISTS k. + STDIO (forwardFD fs fd k) * + INSTREAM_STR fd is rest (forwardFD fs fd k) * + &(STRING_TYPE sym symv)’ + >- + (xapp + \\ qexistsl [‘emp’, ‘s’, ‘forwardFD fs fd k’, ‘fd’, ‘[c]’] + \\ simp [LIST_TYPE_def] + \\ STDIO_forwardFD_INSTREAM_STR_tac) + \\ xlet_autop + \\ xif >- (ntac 2 xlet_autop \\ xcon \\ lex_aux_base_tac ‘k’) + \\ ntac 2 xlet_autop \\ xapp + \\ qexistsl [‘emp’, ‘forwardFD fs fd k’, ‘fd’] + \\ simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def, lex_aux_post_def] + \\ ntac 2 CASE_TAC + \\ STDIO_forwardFD_INSTREAM_STR_tac QED diff --git a/basis/pure/mlsexpScript.sml b/basis/pure/mlsexpScript.sml index 25b10e8199..d3111b5782 100644 --- a/basis/pure/mlsexpScript.sml +++ b/basis/pure/mlsexpScript.sml @@ -72,28 +72,26 @@ Proof rw[read_string_def] \\ imp_res_tac read_string_aux_length QED -Definition read_symbol_aux_def: - read_symbol_aux [] acc = - (implode (REVERSE acc), []) ∧ - read_symbol_aux (c::cs) acc = - if c = #")" ∨ isSpace c then (implode (REVERSE acc), (c::cs)) - else read_symbol_aux cs (c::acc) -End - (* Returns the string until a closing parenthesis or whitespace, and the - rest of the input. *) + rest of the input. + + See usage in lex_aux as to why a "non-aux function" is exposing an + accumulator. *) Definition read_symbol_def: - read_symbol (input: string) : (mlstring # string) = - read_symbol_aux input [] + read_symbol [] acc = + (implode (REVERSE acc), []) ∧ + read_symbol (c::cs) acc = + if c = #")" ∨ isSpace c then (implode (REVERSE acc), (c::cs)) + else read_symbol cs (c::acc) End -Theorem read_symbol_aux_length: +Theorem read_symbol_length: ∀input acc. - read_symbol_aux input acc = (s, rest) ⇒ LENGTH rest ≤ LENGTH input + read_symbol input acc = (s, rest) ⇒ LENGTH rest ≤ LENGTH input Proof Induct - \\ simp[read_symbol_aux_def] - \\ rw[read_symbol_aux_def] \\ res_tac \\ gvs[] + \\ simp[read_symbol_def] + \\ rw[read_symbol_def] \\ res_tac \\ gvs[] QED (* By tracking the depth, we can ensure we only lex one S-expression at a @@ -116,17 +114,25 @@ Definition lex_aux_def: if depth = 0 then INR ([SYMBOL s], rest) else lex_aux depth rest ((SYMBOL s)::acc) else - case read_symbol (c::cs) of + (* We know that c is not a closing parenthesis or a space, so read_symbol + (c::cs) [] is equivalent to read_symbol cs [c]. + + The choice is an implementation detail relevant in the context of + streams: If we interpret the input string as a stream and use the + latter version of the call, the case split can be seen as consuming the + first character. If we insist on the former version, the case split + can only peek at (not consume) c, and we must add calls to consume c in + the other branches. *) + case read_symbol cs [c] of | (s, rest) => if depth = 0 then INR ([SYMBOL s], rest) else lex_aux depth rest ((SYMBOL s)::acc) Termination wf_rel_tac ‘measure $ (λ(_, input, _). LENGTH input)’ \\ rw[] \\ imp_res_tac read_string_length \\ fs[] - \\ fs[read_symbol_def] - \\ fs[Once read_symbol_aux_def] + \\ fs[Once read_symbol_def] \\ gvs [AllCaseEqs()] - \\ imp_res_tac read_symbol_aux_length \\ fs[] + \\ imp_res_tac read_symbol_length \\ fs[] End (* Tokenizes (at most) one S-expression, and returns the rest of the input. From a396213473d36f5296abb675a127c9cb83fa9bb8 Mon Sep 17 00:00:00 2001 From: Daniel Nezamabadi <55559979+dnezam@users.noreply.github.com> Date: Mon, 8 Dec 2025 16:06:17 +0100 Subject: [PATCH 23/42] [mlsexp] Prove parse_spec --- basis/SexpProgScript.sml | 70 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/basis/SexpProgScript.sml b/basis/SexpProgScript.sml index 6cfe4fce61..a1176fbef6 100644 --- a/basis/SexpProgScript.sml +++ b/basis/SexpProgScript.sml @@ -90,7 +90,10 @@ End val r = translate mlsexpTheory.parse_aux_def; Quote add_cakeml: - fun parse input = parse_aux (lex input) [] [] + fun parse input = + case parse_aux (lex input) [] [] of + [] => raise Fail "parse: empty input" + | (v::_) => v End (*--------------------------------------------------------------* @@ -444,3 +447,68 @@ Proof \\ ntac 2 CASE_TAC \\ STDIO_forwardFD_INSTREAM_STR_tac QED + +Theorem lex_spec: + app (p:'ffi ffi_proj) lex_v [is] + (STDIO fs * INSTREAM_STR fd is s fs) + (case lex s of + | INL (msg, rest) => + POSTe exn. SEP_EXISTS k. + INSTREAM_STR fd is rest (forwardFD fs fd k) * + &(Fail_exn msg exn) + | INR (toks, rest) => + POSTv tokvs. SEP_EXISTS k. + STDIO (forwardFD fs fd k) * + INSTREAM_STR fd is rest (forwardFD fs fd k) * + &(LIST_TYPE MLSEXP_TOKEN_TYPE toks tokvs)) +Proof + xcf "lex" st + \\ xlet_autop \\ xapp + \\ simp [lex_aux_post_def, lex_def] + \\ qexistsl [‘emp’, ‘s’, ‘fs’, ‘fd’, ‘[]’] + \\ simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def] \\ xsimpl +QED + +Theorem parse_spec: + app (p:'ffi ffi_proj) parse_v [is] + (STDIO fs * INSTREAM_STR fd is s fs) + (case parse s of + | INL (msg, rest) => + POSTe exn. SEP_EXISTS k. + INSTREAM_STR fd is rest (forwardFD fs fd k) * + &(Fail_exn msg exn) + | INR (se, rest) => + POSTv sev. SEP_EXISTS k. + STDIO (forwardFD fs fd k) * + INSTREAM_STR fd is rest (forwardFD fs fd k) * + &(MLSEXP_SEXP_TYPE se sev)) +Proof + xcf "parse" st + \\ ntac 2 xlet_autop + \\ simp [parse_def] + \\ namedCases_on ‘lex s’ ["l", "r"] + >- + (namedCases_on ‘l’ ["msg rest"] + \\ xlet ‘POSTe exn. SEP_EXISTS k. + INSTREAM_STR fd is rest (forwardFD fs fd k) * + &(Fail_exn msg exn)’ + >- (xapp \\ qexistsl [‘emp’, ‘s’, ‘fs’, ‘fd’] \\ simp [] \\ xsimpl) + \\ simp [] \\ xsimpl) + \\ namedCases_on ‘r’ ["toks rest"] + \\ xlet ‘POSTv tokvs. SEP_EXISTS k. + STDIO (forwardFD fs fd k) * + INSTREAM_STR fd is rest (forwardFD fs fd k) * + &(LIST_TYPE MLSEXP_TOKEN_TYPE toks tokvs)’ + >- (xapp \\ qexistsl [‘emp’, ‘s’, ‘fs’, ‘fd’] \\ simp [] \\ xsimpl) + \\ xlet ‘POSTv ses. + STDIO (forwardFD fs fd k) * + INSTREAM_STR fd is rest (forwardFD fs fd k) * + &(LIST_TYPE MLSEXP_SEXP_TYPE (parse_aux toks [] []) ses)’ + >- (xapp \\ xsimpl \\ qexistsl [‘[]’, ‘[]’, ‘toks’] \\ simp [LIST_TYPE_def]) + \\ namedCases_on ‘parse_aux toks [] []’ ["", "se ses"] + \\ gvs [LIST_TYPE_def] + \\ xmatch + >- (xlet_autop \\ xraise \\ xsimpl \\ qexists ‘k’ \\ xsimpl \\ simp [Fail_exn_def]) + \\ xvar + \\ xsimpl \\ qexists ‘k’ \\ xsimpl +QED From 2b073b8e5a102b6c91aaa10bbdb6211371e45d91 Mon Sep 17 00:00:00 2001 From: Daniel Nezamabadi <55559979+dnezam@users.noreply.github.com> Date: Tue, 9 Dec 2025 18:35:06 +0100 Subject: [PATCH 24/42] Use basis instead of dafny_sexp --- basis/SexpProgScript.sml | 35 ++-- basis/basisProgScript.sml | 5 +- basis/dependency-order | 1 + compiler/dafny/README.md | 3 - compiler/dafny/dafny_compilerScript.sml | 25 +-- compiler/dafny/dafny_sexpScript.sml | 152 ------------------ compiler/dafny/dafny_to_cakemlScript.sml | 1 - compiler/dafny/sexp_to_dafnyScript.sml | 2 +- compiler/dafny/translation/README.md | 3 - .../translation/dafny_compilerProgScript.sml | 5 +- .../translation/dafny_sexpProgScript.sml | 30 ---- .../translation/sexp_to_dafnyProgScript.sml | 5 +- 12 files changed, 37 insertions(+), 230 deletions(-) delete mode 100644 compiler/dafny/dafny_sexpScript.sml delete mode 100644 compiler/dafny/translation/dafny_sexpProgScript.sml diff --git a/basis/SexpProgScript.sml b/basis/SexpProgScript.sml index a1176fbef6..f9c93ba61f 100644 --- a/basis/SexpProgScript.sml +++ b/basis/SexpProgScript.sml @@ -13,6 +13,7 @@ Ancestors Libs preamble ml_translatorLib (* translation_extends, register_type, .. *) + ml_progLib (* open_module, open_local_block, .. *) basisFunctionsLib (* add_cakeml *) cfLib (* x-tactics *) @@ -22,7 +23,12 @@ Libs val _ = translation_extends "TextIOProg"; +val _ = ml_prog_update (open_module "Sexp"); + val _ = register_type “:mlsexp$sexp”; + +val _ = ml_prog_update open_local_block; + val _ = register_type “:mlsexp$token”; Quote add_cakeml: @@ -89,6 +95,8 @@ End val r = translate mlsexpTheory.parse_aux_def; +val _ = ml_prog_update open_local_in_block; + Quote add_cakeml: fun parse input = case parse_aux (lex input) [] [] of @@ -96,6 +104,9 @@ Quote add_cakeml: | (v::_) => v End +val _ = ml_prog_update close_local_blocks; +val _ = ml_prog_update (close_module NONE); + (*--------------------------------------------------------------* Prove equivalence to mlsexp *--------------------------------------------------------------*) @@ -133,14 +144,14 @@ val STDIO_forwardFD_INSTREAM_STR_tac = Theorem read_string_aux_spec[local]: ∀s acc accv p is fs fd. LIST_TYPE CHAR acc accv ⇒ - app (p:'ffi ffi_proj) read_string_aux_v [is; accv] + app (p:'ffi ffi_proj) Sexp_read_string_aux_v [is; accv] (STDIO fs * INSTREAM_STR fd is s fs) (read_string_aux_post s acc fs is fd) Proof ho_match_mp_tac read_string_aux_ind \\ rpt strip_tac \\ qmatch_goalsub_abbrev_tac ‘read_string_aux_post s₁’ - \\ xcf "read_string_aux" st + \\ xcf_with_def $ definition "Sexp_read_string_aux_v_def" (* [] *) >- (xlet ‘POSTv chv. SEP_EXISTS k. @@ -215,7 +226,7 @@ Proof QED Theorem read_string_spec: - app (p:'ffi ffi_proj) read_string_v [is] + app (p:'ffi ffi_proj) Sexp_read_string_v [is] (STDIO fs * INSTREAM_STR fd is s fs) (case read_string s of | INL (msg, rest) => @@ -228,7 +239,7 @@ Theorem read_string_spec: INSTREAM_STR fd is rest (forwardFD fs fd k) * &(STRING_TYPE slit slitv)) Proof - xcf "read_string" st + xcf_with_def $ definition "Sexp_read_string_v_def" \\ xlet_autop \\ xapp \\ simp [read_string_aux_post_def, read_string_def] \\ qexistsl [‘emp’, ‘s’, ‘fs’, ‘fd’, ‘[]’] @@ -238,7 +249,7 @@ QED Theorem read_symbol_spec[local]: ∀s acc accv p is fs fd. LIST_TYPE CHAR acc accv ⇒ - app (p:'ffi ffi_proj) read_symbol_v [is; accv] + app (p:'ffi ffi_proj) Sexp_read_symbol_v [is; accv] (STDIO fs * INSTREAM_STR fd is s fs) (case read_symbol s acc of | (slit, rest) => @@ -250,7 +261,7 @@ Proof Induct \\ rpt strip_tac \\ qmatch_goalsub_abbrev_tac ‘read_symbol s₁’ - \\ xcf "read_symbol" st + \\ xcf_with_def $ definition "Sexp_read_symbol_v_def" >- (* [] *) (xlet ‘POSTv chv. SEP_EXISTS k. STDIO (forwardFD fs fd k) * @@ -331,14 +342,14 @@ fun lex_aux_base_tac k = Theorem lex_aux_spec: ∀ depth s acc depthv accv p is fs fd. NUM depth depthv ∧ LIST_TYPE MLSEXP_TOKEN_TYPE acc accv ⇒ - app (p:'ffi ffi_proj) lex_aux_v [depthv; is; accv] + app (p:'ffi ffi_proj) Sexp_lex_aux_v [depthv; is; accv] (STDIO fs * INSTREAM_STR fd is s fs) (lex_aux_post depth s acc fs is fd) Proof ho_match_mp_tac lex_aux_ind \\ rpt strip_tac \\ qmatch_goalsub_abbrev_tac ‘lex_aux_post _ s₁’ - \\ xcf "lex_aux" st + \\ xcf_with_def $ definition "Sexp_lex_aux_v_def" (* [] *) >- (xlet ‘POSTv chv. SEP_EXISTS k. @@ -449,7 +460,7 @@ Proof QED Theorem lex_spec: - app (p:'ffi ffi_proj) lex_v [is] + app (p:'ffi ffi_proj) Sexp_lex_v [is] (STDIO fs * INSTREAM_STR fd is s fs) (case lex s of | INL (msg, rest) => @@ -462,7 +473,7 @@ Theorem lex_spec: INSTREAM_STR fd is rest (forwardFD fs fd k) * &(LIST_TYPE MLSEXP_TOKEN_TYPE toks tokvs)) Proof - xcf "lex" st + xcf_with_def $ definition "Sexp_lex_v_def" \\ xlet_autop \\ xapp \\ simp [lex_aux_post_def, lex_def] \\ qexistsl [‘emp’, ‘s’, ‘fs’, ‘fd’, ‘[]’] @@ -470,7 +481,7 @@ Proof QED Theorem parse_spec: - app (p:'ffi ffi_proj) parse_v [is] + app (p:'ffi ffi_proj) Sexp_parse_v [is] (STDIO fs * INSTREAM_STR fd is s fs) (case parse s of | INL (msg, rest) => @@ -483,7 +494,7 @@ Theorem parse_spec: INSTREAM_STR fd is rest (forwardFD fs fd k) * &(MLSEXP_SEXP_TYPE se sev)) Proof - xcf "parse" st + xcf_with_def $ definition "Sexp_parse_v_def" \\ ntac 2 xlet_autop \\ simp [parse_def] \\ namedCases_on ‘lex s’ ["l", "r"] diff --git a/basis/basisProgScript.sml b/basis/basisProgScript.sml index adda22c257..4d4039c7c9 100644 --- a/basis/basisProgScript.sml +++ b/basis/basisProgScript.sml @@ -4,11 +4,11 @@ Theory basisProg Ancestors std_prelude CommandLineProof TextIOProof RuntimeProof - PrettyPrinterProg + PrettyPrinterProg SexpProg Libs preamble ml_translatorLib ml_progLib cfLib basisFunctionsLib -val _ = translation_extends"TextIOProg"; +val _ = translation_extends"SexpProg"; val print_e = ``Var(Long"TextIO"(Short"print"))`` val eval_thm = let @@ -135,4 +135,3 @@ End Theorem basis_Decls_thm = ml_progLib.get_Decls_thm basis_st |> REWRITE_RULE [GSYM basis_def]; - diff --git a/basis/dependency-order b/basis/dependency-order index bc5f26e7cd..f85af63306 100644 --- a/basis/dependency-order +++ b/basis/dependency-order @@ -28,4 +28,5 @@ Hashtable CommandLine Double TextIO +Sexp basis diff --git a/compiler/dafny/README.md b/compiler/dafny/README.md index d771cbfdc3..5890499758 100644 --- a/compiler/dafny/README.md +++ b/compiler/dafny/README.md @@ -12,9 +12,6 @@ Definition of the Dafny to CakeML compiler. [dafny_freshenScript.sml](dafny_freshenScript.sml): Implements the freshen pass, where names are updated to be unique. -[dafny_sexpScript.sml](dafny_sexpScript.sml): -Definitions to lex and parse S-expressions. - [dafny_to_cakemlScript.sml](dafny_to_cakemlScript.sml): Defines the translation of Dafny's to CakeML's AST. diff --git a/compiler/dafny/dafny_compilerScript.sml b/compiler/dafny/dafny_compilerScript.sml index 27375309af..d4ebd5c108 100644 --- a/compiler/dafny/dafny_compilerScript.sml +++ b/compiler/dafny/dafny_compilerScript.sml @@ -3,32 +3,19 @@ *) Theory dafny_compiler Ancestors - result_monad dafny_sexp sexp_to_dafny dafny_to_cakeml + result_monad sexp_to_dafny dafny_to_cakeml dafny_freshen fromSexp simpleSexpParse Libs preamble -(* Trusted frontend *) -Definition frontend_def: - frontend (dfy_sexp: string) = - do - dfy_sexp <- lex dfy_sexp; - dfy_sexp <- parse dfy_sexp; - to_program dfy_sexp - od -End - Definition compile_def: compile dfy = from_program (freshen_program dfy) End Definition dfy_to_cml_def: - dfy_to_cml (dfy_sexp: string) = - do - dfy <- frontend dfy_sexp; - compile dfy - od + dfy_to_cml dfy_sexp = + do dfy <- to_program dfy_sexp; compile dfy od End (* If compilation failed, outputs a program that prints the error message in @@ -53,12 +40,10 @@ Definition cmlm_to_str_def: End Definition main_function_def: - main_function (input: mlstring): mlstring = + main_function (sexp: mlsexp$sexp): mlstring = let - input = explode input; - cmlm = dfy_to_cml input; + cmlm = dfy_to_cml sexp; cml_str = cmlm_to_str cmlm; in implode cml_str End - diff --git a/compiler/dafny/dafny_sexpScript.sml b/compiler/dafny/dafny_sexpScript.sml deleted file mode 100644 index d624b4735e..0000000000 --- a/compiler/dafny/dafny_sexpScript.sml +++ /dev/null @@ -1,152 +0,0 @@ -(* - Definitions to lex and parse S-expressions. -*) -Theory dafny_sexp -Ancestors - mlstring result_monad -Libs - preamble - - -(* Datatypes *) - -Datatype: - token = OPEN | CLOSE | STRTOK mlstring -End - -Datatype: - sexp = Atom mlstring | Expr (sexp list) -End - -(* Definition for lexing *) - -Definition read_quoted_aux_def: - read_quoted_aux [] acc = - fail «read_quoted_aux: Missing closing quotes» ∧ - read_quoted_aux (#"\""::rest) acc = - return ((REVERSE acc), rest) ∧ - read_quoted_aux (#"\\"::#"\""::rest) acc = - read_quoted_aux rest (#"\""::acc) ∧ - read_quoted_aux (#"\\"::#"\\"::rest) acc = - read_quoted_aux rest (#"\\"::acc) ∧ - read_quoted_aux (c::cs) acc = - read_quoted_aux cs (c::acc) -End - -Definition read_quoted_def: - read_quoted (cs: string) = - read_quoted_aux cs [] -End - -Theorem read_quoted_aux_length: - ∀cs acc x y. read_quoted_aux cs acc = INR (x, y) ⇒ LENGTH y ≤ LENGTH cs -Proof - ho_match_mp_tac read_quoted_aux_ind \\ rw[] - \\ pop_assum mp_tac - \\ once_rewrite_tac[read_quoted_aux_def] - \\ EVERY_CASE_TAC - \\ rpt strip_tac \\ res_tac \\ gvs[] -QED - -Definition read_atom_aux_def: - read_atom_aux [] acc = - ((REVERSE acc), []) ∧ - read_atom_aux (c::cs) acc = - if MEM c ") \t\n" then ((REVERSE acc), (c::cs)) - else read_atom_aux cs (c::acc) -End - -Definition read_atom_def: - read_atom cs = - read_atom_aux cs [] -End - -Theorem read_atom_aux_length: - ∀cs x y acc. read_atom_aux cs acc = (x, y) ⇒ LENGTH y ≤ LENGTH cs -Proof - Induct - \\ simp[read_atom_aux_def] - \\ rw[read_atom_aux_def] \\ res_tac \\ gvs[] -QED - -Theorem read_quoted_length: - ∀cs x y. read_quoted cs = INR (x, y) ⇒ LENGTH y ≤ LENGTH cs -Proof - rw[read_quoted_def] \\ imp_res_tac read_quoted_aux_length -QED - -(* Adapted from - * https://github.com/dafny-lang/dafny/blob/bc6b587e264e3c531c4d6698abd421abdff3aea9/Source/DafnyCore/Generic/Util.cs#L341 - *) -Definition unescape_string_def: - unescape_string (c1::c2::rest) = - (if c1 = #"\\" ∧ c2 = #"'" then - #"'"::(unescape_string rest) - else if c1 = #"\\" ∧ c2 = #"\"" then - #"\""::(unescape_string rest) - else if c1 = #"\\" ∧ c2 = #"\\" then - #"\\"::(unescape_string rest) - else if c1 = #"\\" ∧ c2 = #"0" then - #"\000"::(unescape_string rest) - else if c1 = #"\\" ∧ c2 = #"n" then - #"\n"::(unescape_string rest) - else if c1 = #"\\" ∧ c2 = #"r" then - #"\r"::(unescape_string rest) - else if c1 = #"\\" ∧ c2 = #"t" then - #"\t"::(unescape_string rest) - else - c1::(unescape_string (c2::rest))) ∧ - unescape_string (c::rest) = (c::(unescape_string rest)) ∧ - unescape_string "" = "" -End - -(* Removed use of monads for termination proof *) -Definition lex_aux_def: - lex_aux [] acc = - (INR acc) ∧ - lex_aux (c::cs) acc = - if MEM c " \t\n" then lex_aux cs acc - else if c = #"(" then lex_aux cs (OPEN::acc) - else if c = #")" then lex_aux cs (CLOSE::acc) - else if c = #"\"" then - case read_quoted cs of - | INL msg => INL msg - | INR (s, rest) => - lex_aux rest ((STRTOK (implode (unescape_string s)))::acc) - else - case read_atom (c::cs) of - | (s, rest) => - lex_aux rest ((STRTOK (implode (unescape_string s)))::acc) -Termination - WF_REL_TAC ‘measure $ LENGTH o FST’ \\ rw[] - \\ imp_res_tac read_quoted_length \\ fs[] - \\ pop_assum mp_tac - \\ simp[read_atom_def] - \\ simp[Once read_atom_aux_def] - \\ strip_tac - \\ imp_res_tac read_atom_aux_length \\ fs[] -End - -Definition lex_def: - lex cs = lex_aux cs [] -End - -(* Definitions for parsing *) - -Definition parse_aux_def: - parse_aux [] xs s = xs ∧ - parse_aux (CLOSE :: rest) xs s = parse_aux rest [] (xs::s) ∧ - parse_aux (OPEN :: rest) xs s = - (case s of - | [] => parse_aux rest xs s - | (y::ys) => parse_aux rest ((Expr xs)::y) ys) ∧ - parse_aux ((STRTOK st) :: rest) xs s = parse_aux rest ((Atom st)::xs) s -End - -Definition parse_def: - parse toks = - case parse_aux toks [] [] of - | [e] => return e - | _ => fail «parse: Not exactly one S-expression in input» -End - diff --git a/compiler/dafny/dafny_to_cakemlScript.sml b/compiler/dafny/dafny_to_cakemlScript.sml index 524d1b733c..c48954b332 100644 --- a/compiler/dafny/dafny_to_cakemlScript.sml +++ b/compiler/dafny/dafny_to_cakemlScript.sml @@ -522,7 +522,6 @@ End (* Testing *) -(* open dafny_sexpTheory *) (* open sexp_to_dafnyTheory *) (* open TextIO *) diff --git a/compiler/dafny/sexp_to_dafnyScript.sml b/compiler/dafny/sexp_to_dafnyScript.sml index 8b067aa133..69a16caac0 100644 --- a/compiler/dafny/sexp_to_dafnyScript.sml +++ b/compiler/dafny/sexp_to_dafnyScript.sml @@ -3,7 +3,7 @@ *) Theory sexp_to_dafny Ancestors - result_monad dafny_sexp dafny_ast mlint + result_monad dafny_ast mlint mlsexp Libs preamble diff --git a/compiler/dafny/translation/README.md b/compiler/dafny/translation/README.md index 1ac2b2fedb..840ed8bef2 100644 --- a/compiler/dafny/translation/README.md +++ b/compiler/dafny/translation/README.md @@ -12,9 +12,6 @@ Translates the Dafny to CakeML compiler. [dafny_freshenProgScript.sml](dafny_freshenProgScript.sml): Translates definitions for the freshen pass. -[dafny_sexpProgScript.sml](dafny_sexpProgScript.sml): -Translates definitions to lex and parse S-expressions generated by Dafny. - [dafny_to_cakemlProgScript.sml](dafny_to_cakemlProgScript.sml): Translates definitions to translate from Dafny's to CakeML's AST. diff --git a/compiler/dafny/translation/dafny_compilerProgScript.sml b/compiler/dafny/translation/dafny_compilerProgScript.sml index f9519f61b8..6271b7662f 100644 --- a/compiler/dafny/translation/dafny_compilerProgScript.sml +++ b/compiler/dafny/translation/dafny_compilerProgScript.sml @@ -241,7 +241,6 @@ val r = translate fromSexpTheory.decsexp_def; (* Translating dafny_compilerTheory *) -val r = translate dafny_compilerTheory.frontend_def; val r = translate dafny_compilerTheory.compile_def; val r = translate dafny_compilerTheory.dfy_to_cml_def; val r = translate dafny_compilerTheory.unpack_def; @@ -253,7 +252,7 @@ val r = translate dafny_compilerTheory.main_function_def; (* Sanity checks + Finalizing *) -val _ = type_of “main_function” = “:mlstring -> mlstring” +val _ = type_of “main_function” = “:mlsexp$sexp -> mlstring” orelse failwith "The main_function has the wrong type."; val _ = r |> hyp |> null orelse @@ -261,7 +260,7 @@ val _ = r |> hyp |> null orelse \dafny_compilerTheory.main_function_def"); val main = process_topdecs - ‘print (main_function (TextIO.inputAll TextIO.stdIn));’; + ‘print (main_function (Sexp.parse (TextIO.b_openStdIn ())));’; val prog = get_ml_prog_state () diff --git a/compiler/dafny/translation/dafny_sexpProgScript.sml b/compiler/dafny/translation/dafny_sexpProgScript.sml deleted file mode 100644 index b766f4396e..0000000000 --- a/compiler/dafny/translation/dafny_sexpProgScript.sml +++ /dev/null @@ -1,30 +0,0 @@ -(* - Translates definitions to lex and parse S-expressions generated by Dafny. -*) -Theory dafny_sexpProg -Ancestors - result_monadProg dafny_sexp -Libs - preamble ml_translatorLib - - -val _ = translation_extends "result_monadProg"; - -val _ = register_type “:dafny_sexp$token”; -val _ = register_type “:dafny_sexp$sexp”; - -val r = translate dafny_sexpTheory.read_quoted_aux_def; -val r = translate dafny_sexpTheory.read_quoted_def; - -val r = translate boolTheory.IN_DEF; -val r = translate listTheory.LIST_TO_SET_DEF; -val r = translate dafny_sexpTheory.read_atom_aux_def; - -val r = translate dafny_sexpTheory.read_atom_def; -val r = translate dafny_sexpTheory.unescape_string_def; -val r = translate dafny_sexpTheory.lex_aux_def; -val r = translate dafny_sexpTheory.lex_def; - -val r = translate dafny_sexpTheory.parse_aux_def; -val r = translate dafny_sexpTheory.parse_def; - diff --git a/compiler/dafny/translation/sexp_to_dafnyProgScript.sml b/compiler/dafny/translation/sexp_to_dafnyProgScript.sml index b89a3d1272..7882fa9e2f 100644 --- a/compiler/dafny/translation/sexp_to_dafnyProgScript.sml +++ b/compiler/dafny/translation/sexp_to_dafnyProgScript.sml @@ -3,12 +3,13 @@ *) Theory sexp_to_dafnyProg Ancestors - dafny_sexpProg sexp_to_dafny + sexp_to_dafny + result_monadProg Libs preamble ml_translatorLib -val _ = translation_extends "dafny_sexpProg"; +val _ = translation_extends "result_monadProg"; val r = translate sexp_to_dafnyTheory.to_mlstring_def; val r = translate sexp_to_dafnyTheory.to_bool_def; From 1fdca7f4437ce6befa632c6fe068ef39e6a17222 Mon Sep 17 00:00:00 2001 From: Magnus Myreen Date: Tue, 9 Dec 2025 23:34:32 +0100 Subject: [PATCH 25/42] Sketch sexp pretty printing --- basis/README.md | 7 +- basis/pure/mlsexpScript.sml | 162 +++++++++++++++++++++++++++++++++++- 2 files changed, 165 insertions(+), 4 deletions(-) diff --git a/basis/README.md b/basis/README.md index 59849d2bb6..b573892585 100644 --- a/basis/README.md +++ b/basis/README.md @@ -76,6 +76,9 @@ Proof about the exit function in the Runtime module. This module contains CakeML code implementing a functional set type using a self-balancing binary tree. +[SexpProgScript.sml](SexpProgScript.sml): +Module for parsing and pretty-printing s-expressions. + [StringProgScript.sml](StringProgScript.sml): Module about the built-in string tyoe. @@ -144,9 +147,7 @@ Lemmas about the file system model used by the proof about TextIO. Logical model of filesystem and I/O streams [mlbasicsProgScript.sml](mlbasicsProgScript.sml): -Bind various built-in functions to function names that the parser -expects, e.g. the parser generates a call to a function called "+" -when it parses 1+2. +Translates a variety of basic constructs. [pure](pure): HOL definitions of the pure functions used in the CakeML basis. diff --git a/basis/pure/mlsexpScript.sml b/basis/pure/mlsexpScript.sml index d3111b5782..e50f27f9c9 100644 --- a/basis/pure/mlsexpScript.sml +++ b/basis/pure/mlsexpScript.sml @@ -185,7 +185,167 @@ Proof QED (*--------------------------------------------------------------* - Pretty printing + Pretty printing of str_tree + *--------------------------------------------------------------*) + +Datatype: + str_tree = Str mlstring + | Trees (str_tree list) + | GrabLine str_tree +End + +Datatype: + pretty = Parenthesis pretty + | String mlstring + | Append pretty bool pretty + | Size num pretty +End + +Definition newlines_def: + newlines [] = String (strlit "") ∧ + newlines [x] = x ∧ + newlines (x::xs) = Append x T (newlines xs) +End + +Definition v2pretty_def: + (v2pretty v = + case v of + | Str s => String s + | GrabLine w => Size 100000 (v2pretty w) + | Trees l => Parenthesis (newlines (vs2pretty l))) ∧ + (vs2pretty vs = + case vs of + | [] => [] + | (v::vs) => v2pretty v :: vs2pretty vs) +End + +Definition get_size_def: + get_size (Size n x) = n ∧ + get_size (Append x _ y) = get_size x + get_size y + 1 ∧ + get_size (Parenthesis x) = get_size x + 2 ∧ + get_size _ = 0 +End + +Definition get_next_size_def: + get_next_size (Size n x) = n ∧ + get_next_size (Append x _ y) = get_next_size x ∧ + get_next_size (Parenthesis x) = get_next_size x + 2 ∧ + get_next_size _ = 0 +End + +Definition annotate_def: + annotate (String s) = Size (strlen s) (String s) ∧ + annotate (Parenthesis b) = + (let b = annotate b in + Size (get_size b + 2) (Parenthesis b)) ∧ + annotate (Append b1 n b2) = + (let b1 = annotate b1 in + let b2 = annotate b2 in + (* Size (get_size b1 + get_size b2 + 1) *) (Append b1 n b2)) ∧ + annotate (Size n b) = Size n (annotate b) +End + +Definition remove_all_def: + remove_all (Parenthesis v) = Parenthesis (remove_all v) ∧ + remove_all (String v1) = String v1 ∧ + remove_all (Append v2 _ v3) = Append (remove_all v2) F (remove_all v3) ∧ + remove_all (Size v4 v5) = remove_all v5 +End + +Definition smart_remove_def: + smart_remove i k (Size n b) = + (if k + n < 70 then remove_all b else smart_remove i k b) ∧ + smart_remove i k (Parenthesis v) = Parenthesis (smart_remove (i+1) (k+1) v) ∧ + smart_remove i k (String v1) = String v1 ∧ + smart_remove i k (Append v2 b v3) = + let n2 = get_size v2 in + let n3 = get_next_size v3 in + if k + n2 + n3 < 50 then + Append (smart_remove i k v2) F (smart_remove i (k+n2) v3) + else + Append (smart_remove i k v2) T (smart_remove i i v3) +End + +Definition flatten_def: + flatten indent (Size n p) s = flatten indent p s ∧ + flatten indent (Parenthesis p) s = + strlit "(" :: flatten (concat [indent; strlit " "]) p (strlit ")" :: s) ∧ + flatten indent (String t) s = t :: s ∧ + flatten indent (Append p1 b p2) s = + flatten indent p1 ((if b then indent else strlit " ") :: flatten indent p2 s) +End + +Definition v2strs_def: + v2strs end v = flatten (strlit "\n") (smart_remove 0 0 (annotate (v2pretty v))) [end] +End + +Theorem test1_v2strs[local]: + concat (v2strs (strlit "") + (Trees [Str (strlit "hello"); + Str (strlit "there")])) = + strlit "(hello there)" +Proof + EVAL_TAC +QED + +Theorem test2_v2strs[local]: + concat (v2strs (strlit "") + (Trees [Str (strlit "test"); + GrabLine (Str (strlit "hi")); + GrabLine (Str (strlit "there"))])) = + strlit "(test\n hi\n there)" +Proof + EVAL_TAC +QED + +(*--------------------------------------------------------------* + Pretty printing of sexp + *--------------------------------------------------------------*) + +Definition is_safe_char_def: + is_safe_char c ⇔ ~MEM c " \t\n()\"\000" +End + +Definition str_every_def: + str_every p n s = + if n = 0 then T else + p (strsub s (n-1)) ∧ str_every p (n-1:num) s +End + +Definition make_str_safe_def: + make_str_safe s = + if str_every is_safe_char (strlen s) s then s else escape_str s +End + +Definition sexp2tree_def: + sexp2tree (Atom s) = Str (make_str_safe s) ∧ + sexp2tree (Expr l) = Trees (sexp2trees l) ∧ + sexp2trees [] = [] ∧ + sexp2trees (v::vs) = sexp2tree v :: sexp2trees vs +End + +Definition sexp_to_app_list_def: + sexp_to_app_list (Atom s) = List [make_str_safe s] ∧ + sexp_to_app_list (Expr l) = + Append (List [strlit "("]) + (Append (sexps_to_app_list l) (List [strlit ")"])) ∧ + sexps_to_app_list [] = List [] ∧ + sexps_to_app_list (v::vs) = + if NULL vs then sexp_to_app_list v + else Append (sexp_to_app_list v) + (Append (List [strlit " "]) (sexps_to_app_list vs)) +End + +Definition sexp_to_string_def: + sexp_to_string s = concat (append (Append (sexp_to_app_list s) (List [«\n»]))) +End + +Definition sexp_to_pretty_string_def: + sexp_to_pretty_string s = concat (v2strs (strlit "\n") (sexp2tree s)) +End + +(*--------------------------------------------------------------* + Proofs relating parsing with pretty prniting *--------------------------------------------------------------*) Definition to_tokens_def: From f57bcf9499140e54f8b48b0d9d18e24395718a85 Mon Sep 17 00:00:00 2001 From: Daniel Nezamabadi <55559979+dnezam@users.noreply.github.com> Date: Tue, 9 Dec 2025 23:37:09 +0100 Subject: [PATCH 26/42] Add REPL pretty-printing for s-expressions --- basis/SexpProgScript.sml | 14 +++++++++++--- basis/types.txt | 2 ++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/basis/SexpProgScript.sml b/basis/SexpProgScript.sml index f9c93ba61f..d5e38039ab 100644 --- a/basis/SexpProgScript.sml +++ b/basis/SexpProgScript.sml @@ -25,7 +25,15 @@ val _ = translation_extends "TextIOProg"; val _ = ml_prog_update (open_module "Sexp"); -val _ = register_type “:mlsexp$sexp”; +(* Temporarily disable full type names to avoid mlsexp_sexp as the exported type name *) +val _ = with_flag (use_full_type_names, false) register_type “:mlsexp$sexp”; + +(* Pretty printing function for s-expressions used by the REPL *) +Quote add_cakeml: + fun pp_sexp se = case se of + Atom s => PrettyPrinter.app_block "Atom" [PrettyPrinter.token s] + | Expr ses => PrettyPrinter.app_block "Expr" [PrettyPrinter.pp_list pp_sexp ses] +End val _ = ml_prog_update open_local_block; @@ -492,7 +500,7 @@ Theorem parse_spec: POSTv sev. SEP_EXISTS k. STDIO (forwardFD fs fd k) * INSTREAM_STR fd is rest (forwardFD fs fd k) * - &(MLSEXP_SEXP_TYPE se sev)) + &(SEXP_TYPE se sev)) Proof xcf_with_def $ definition "Sexp_parse_v_def" \\ ntac 2 xlet_autop @@ -514,7 +522,7 @@ Proof \\ xlet ‘POSTv ses. STDIO (forwardFD fs fd k) * INSTREAM_STR fd is rest (forwardFD fs fd k) * - &(LIST_TYPE MLSEXP_SEXP_TYPE (parse_aux toks [] []) ses)’ + &(LIST_TYPE SEXP_TYPE (parse_aux toks [] []) ses)’ >- (xapp \\ xsimpl \\ qexistsl [‘[]’, ‘[]’, ‘toks’] \\ simp [LIST_TYPE_def]) \\ namedCases_on ‘parse_aux toks [] []’ ["", "se ses"] \\ gvs [LIST_TYPE_def] diff --git a/basis/types.txt b/basis/types.txt index 27721db3af..87cfea1f78 100644 --- a/basis/types.txt +++ b/basis/types.txt @@ -388,6 +388,8 @@ TextIO.b_inputAllTokensStdIn: char -> (char -> bool) -> (string -> 'a) -> 'a lis TextIO.foldChars: (char -> 'a -> 'a) -> 'a -> string option -> 'a option TextIO.foldLines: char -> (string -> 'a -> 'a) -> 'a -> string option -> 'a option TextIO.foldTokens: char -> (char -> bool) -> (string -> 'a) -> ('a list -> 'b -> 'b) -> 'b -> string option -> 'b option +Sexp.pp_sexp: Sexp.sexp -> PrettyPrinter.pp_data +Sexp.parse: TextIO.b_instream -> Sexp.sexp fst: 'a * 'b -> 'a snd: 'a * 'b -> 'b curry: ('a * 'b -> 'c) -> 'a -> 'b -> 'c From bc07c33c7332660c6258e59f2c7b4db8960f9de6 Mon Sep 17 00:00:00 2001 From: Daniel Nezamabadi <55559979+dnezam@users.noreply.github.com> Date: Thu, 11 Dec 2025 16:13:56 +0100 Subject: [PATCH 27/42] [mlsexp] Add fromString + Rename parse to b_inputSexp --- basis/SexpProgScript.sml | 189 +++++++++++++++++++++--------------- basis/pure/mlsexpScript.sml | 8 ++ basis/types.txt | 3 +- 3 files changed, 120 insertions(+), 80 deletions(-) diff --git a/basis/SexpProgScript.sml b/basis/SexpProgScript.sml index d5e38039ab..2b966a6792 100644 --- a/basis/SexpProgScript.sml +++ b/basis/SexpProgScript.sml @@ -35,36 +35,62 @@ Quote add_cakeml: | Expr ses => PrettyPrinter.app_block "Expr" [PrettyPrinter.pp_list pp_sexp ses] End +(* We will define two functions to generate s-expressions: + 1. fromString, which is + - simple, + - fails with None (in the style of the SML basis) + - translated from mlsexp. + 2. b_inputSexp, which is + - efficient (buffered input), + - fails with exceptions, + - written in CakeML, proved equivalent to definitions in mlsexp using + characteristic formulae (cf). + + OPT If needed, fromString can be made more efficient by using mlstring + instead of a char list. + *) + val _ = ml_prog_update open_local_block; +(* Shared, but private, helpers and types *) val _ = register_type “:mlsexp$token”; +val r = translate mlsexpTheory.parse_aux_def; + +(* Private helpers for fromString *) +val r = translate mlsexpTheory.read_string_aux_def; +val r = translate mlsexpTheory.read_string_def; +val r = translate $ SRULE [isSpace_def] mlsexpTheory.read_symbol_def; +val r = translate $ SRULE [isSpace_def] mlsexpTheory.lex_aux_def; +val r = translate mlsexpTheory.lex_def; +val r = translate mlsexpTheory.parse_def; +(* Private helpers for b_inputSexp *) Quote add_cakeml: - fun read_string_aux input acc = + fun read_string_aux_imp input acc = case TextIO.b_input1 input of None => raise Fail "read_string_aux: unterminated string literal" | Some c => if c = #"\"" then String.implode (List.rev acc) else if c = #"\\" then (case TextIO.b_input1 input of - None => read_string_aux input acc (* causes error: unterminated string *) + None => read_string_aux_imp input acc (* causes error: unterminated string *) | Some e => - (if e = #"\\" then read_string_aux input (#"\""::acc) else - if e = #"0" then read_string_aux input (#"\000"::acc) else - if e = #"n" then read_string_aux input (#"\n"::acc) else - if e = #"r" then read_string_aux input (#"\""::acc) else - if e = #"t" then read_string_aux input (#"\""::acc) else + (if e = #"\\" then read_string_aux_imp input (#"\""::acc) else + if e = #"0" then read_string_aux_imp input (#"\000"::acc) else + if e = #"n" then read_string_aux_imp input (#"\n"::acc) else + if e = #"r" then read_string_aux_imp input (#"\""::acc) else + if e = #"t" then read_string_aux_imp input (#"\""::acc) else raise Fail "read_string_aux: unrecognised escape")) else - read_string_aux input (c::acc) + read_string_aux_imp input (c::acc) End Quote add_cakeml: - fun read_string input = read_string_aux input [] + fun read_string_imp input = read_string_aux_imp input [] End Quote add_cakeml: - fun read_symbol input acc = + fun read_symbol_imp input acc = case TextIO.b_peekChar input of None => String.implode (List.rev acc) | Some c => @@ -72,42 +98,47 @@ Quote add_cakeml: then String.implode (List.rev acc) else ( TextIO.b_input1 input; (* Consume c *) - read_symbol input (c::acc)) + read_symbol_imp input (c::acc)) End Quote add_cakeml: - fun lex_aux depth input acc = + fun lex_aux_imp depth input acc = case TextIO.b_input1 input of None => if depth = 0 then acc else raise Fail "lex_aux: missing closing parenthesis" | Some c => - if Char.isSpace c then lex_aux depth input acc - else if c = #"(" then lex_aux (depth + 1) input (Open::acc) + if Char.isSpace c then lex_aux_imp depth input acc + else if c = #"(" then lex_aux_imp (depth + 1) input (Open::acc) else if c = #")" then (if depth = 0 then raise Fail "lex_aux: too many closing parenthesis" else if depth = 1 then Close::acc - else lex_aux (depth - 1) input (Close::acc)) + else lex_aux_imp (depth - 1) input (Close::acc)) else if c = #"\"" then - let val s = read_string input in + let val s = read_string_imp input in if depth = 0 then [Symbol s] - else lex_aux depth input ((Symbol s)::acc) end + else lex_aux_imp depth input ((Symbol s)::acc) end else - let val s = read_symbol input [c] in + let val s = read_symbol_imp input [c] in if depth = 0 then [Symbol s] - else lex_aux depth input ((Symbol s)::acc) end + else lex_aux_imp depth input ((Symbol s)::acc) end End Quote add_cakeml: - fun lex input = lex_aux 0 input [] + fun lex_imp input = lex_aux_imp 0 input [] End -val r = translate mlsexpTheory.parse_aux_def; - val _ = ml_prog_update open_local_in_block; +(* Exported functions *) +val _ = next_ml_names := ["fromString"]; +val r = translate mlsexpTheory.fromString_def; + +(* If we were 100% consistent, we should call this parse_imp. Since it isn't a + neat name, and parse is already taken by the translated pure version, we + use b_inputSexp. *) Quote add_cakeml: - fun parse input = - case parse_aux (lex input) [] [] of + fun b_inputSexp input = + case parse_aux (lex_imp input) [] [] of [] => raise Fail "parse: empty input" | (v::_) => v End @@ -121,8 +152,8 @@ val _ = ml_prog_update (close_module NONE); val st = get_ml_prog_state (); -Definition read_string_aux_post_def: - read_string_aux_post s acc fs is fd = +Definition read_string_aux_imp_post_def: + read_string_aux_imp_post s acc fs is fd = (case read_string_aux s acc of | INL (msg, rest) => POSTe exn. SEP_EXISTS k. @@ -135,10 +166,10 @@ Definition read_string_aux_post_def: &(STRING_TYPE slit slitv)) End -(* Can be used in read_string_aux_spec to finish proofs about base cases. +(* Can be used in read_string_aux_imp_spec to finish proofs about base cases. * k indicates how much we moved forward (passed to forwardFD) *) -fun read_string_aux_base_tac k = - (simp [read_string_aux_post_def, read_string_aux_def] \\ xsimpl +fun read_string_aux_imp_base_tac k = + (simp [read_string_aux_imp_post_def, read_string_aux_def] \\ xsimpl \\ qexists k \\ xsimpl \\ simp [Fail_exn_def]); (* Useful when finishing up recursive cases. Takes care of instantiating k in @@ -149,17 +180,17 @@ val STDIO_forwardFD_INSTREAM_STR_tac = \\ qmatch_goalsub_abbrev_tac ‘forwardFD fs fd kpx’ \\ qexists ‘kpx’ \\ xsimpl); -Theorem read_string_aux_spec[local]: +Theorem read_string_aux_imp_spec[local]: ∀s acc accv p is fs fd. LIST_TYPE CHAR acc accv ⇒ - app (p:'ffi ffi_proj) Sexp_read_string_aux_v [is; accv] + app (p:'ffi ffi_proj) Sexp_read_string_aux_imp_v [is; accv] (STDIO fs * INSTREAM_STR fd is s fs) - (read_string_aux_post s acc fs is fd) + (read_string_aux_imp_post s acc fs is fd) Proof ho_match_mp_tac read_string_aux_ind \\ rpt strip_tac - \\ qmatch_goalsub_abbrev_tac ‘read_string_aux_post s₁’ - \\ xcf_with_def $ definition "Sexp_read_string_aux_v_def" + \\ qmatch_goalsub_abbrev_tac ‘read_string_aux_imp_post s₁’ + \\ xcf_with_def $ definition "Sexp_read_string_aux_imp_v_def" (* [] *) >- (xlet ‘POSTv chv. SEP_EXISTS k. @@ -169,7 +200,7 @@ Proof >- (xapp_spec b_input1_spec_str) \\ unabbrev_all_tac \\ gvs [OPTION_TYPE_def] \\ xmatch \\ xlet_autop \\ xraise - \\ read_string_aux_base_tac ‘k’) + \\ read_string_aux_imp_base_tac ‘k’) (* c::rest *) >- (xlet ‘POSTv chv. SEP_EXISTS k. @@ -185,11 +216,11 @@ Proof \\ xapp \\ xsimpl \\ first_assum $ irule_at (Pos hd) \\ rpt strip_tac - \\ read_string_aux_base_tac ‘k’) + \\ read_string_aux_imp_base_tac ‘k’) \\ xlet_autop \\ xif (* c = \ *) >- - (rename [‘read_string_aux_post (STRING _ s)’] + (rename [‘read_string_aux_imp_post (STRING _ s)’] \\ xlet ‘POSTv chv. SEP_EXISTS k₁. STDIO (forwardFD fs fd (k + k₁)) * INSTREAM_STR fd is (TL s) (forwardFD fs fd (k + k₁)) * @@ -203,7 +234,7 @@ Proof (xmatch \\ xapp \\ first_assum $ irule_at (Pos hd) \\ qexistsl [‘forwardFD fs fd (k + k₁)’, ‘fd’, ‘emp’] - \\ simp [read_string_aux_post_def, read_string_aux_def] + \\ simp [read_string_aux_imp_post_def, read_string_aux_def] \\ STDIO_forwardFD_INSTREAM_STR_tac) \\ xmatch (* escape characters *) @@ -216,25 +247,25 @@ Proof \\ xapp \\ simp [LIST_TYPE_def] \\ qexistsl [‘emp’, ‘forwardFD fs fd (k + k₁)’, ‘fd’] - \\ simp [read_string_aux_post_def, read_string_aux_def] + \\ simp [read_string_aux_imp_post_def, read_string_aux_def] \\ ntac 2 CASE_TAC \\ STDIO_forwardFD_INSTREAM_STR_tac)) (* unrecognised escape *) \\ xlet_autop \\ xraise - \\ read_string_aux_base_tac ‘k + k₁’) + \\ read_string_aux_imp_base_tac ‘k + k₁’) (* simply push c and recurse *) \\ xlet_autop \\ gvs [] \\ xapp \\ simp [LIST_TYPE_def] \\ qexistsl [‘emp’, ‘forwardFD fs fd k’, ‘fd’] - \\ simp [read_string_aux_post_def, read_string_aux_def] + \\ simp [read_string_aux_imp_post_def, read_string_aux_def] \\ ntac 2 CASE_TAC \\ STDIO_forwardFD_INSTREAM_STR_tac) QED -Theorem read_string_spec: - app (p:'ffi ffi_proj) Sexp_read_string_v [is] +Theorem read_string_imp_spec: + app (p:'ffi ffi_proj) Sexp_read_string_imp_v [is] (STDIO fs * INSTREAM_STR fd is s fs) (case read_string s of | INL (msg, rest) => @@ -247,17 +278,17 @@ Theorem read_string_spec: INSTREAM_STR fd is rest (forwardFD fs fd k) * &(STRING_TYPE slit slitv)) Proof - xcf_with_def $ definition "Sexp_read_string_v_def" + xcf_with_def $ definition "Sexp_read_string_imp_v_def" \\ xlet_autop \\ xapp - \\ simp [read_string_aux_post_def, read_string_def] + \\ simp [read_string_aux_imp_post_def, read_string_def] \\ qexistsl [‘emp’, ‘s’, ‘fs’, ‘fd’, ‘[]’] \\ simp [LIST_TYPE_def] \\ xsimpl QED -Theorem read_symbol_spec[local]: +Theorem read_symbol_imp_spec[local]: ∀s acc accv p is fs fd. LIST_TYPE CHAR acc accv ⇒ - app (p:'ffi ffi_proj) Sexp_read_symbol_v [is; accv] + app (p:'ffi ffi_proj) Sexp_read_symbol_imp_v [is; accv] (STDIO fs * INSTREAM_STR fd is s fs) (case read_symbol s acc of | (slit, rest) => @@ -269,7 +300,7 @@ Proof Induct \\ rpt strip_tac \\ qmatch_goalsub_abbrev_tac ‘read_symbol s₁’ - \\ xcf_with_def $ definition "Sexp_read_symbol_v_def" + \\ xcf_with_def $ definition "Sexp_read_symbol_imp_v_def" >- (* [] *) (xlet ‘POSTv chv. SEP_EXISTS k. STDIO (forwardFD fs fd k) * @@ -325,8 +356,8 @@ Proof \\ STDIO_forwardFD_INSTREAM_STR_tac) QED -Definition lex_aux_post_def: - lex_aux_post depth s acc fs is fd = +Definition lex_aux_imp_post_def: + lex_aux_imp_post depth s acc fs is fd = (case lex_aux depth s acc of | INL (msg, rest) => POSTe exn. SEP_EXISTS k. @@ -341,23 +372,23 @@ End val MLSEXP_TOKEN_TYPE_def = theorem "MLSEXP_TOKEN_TYPE_def"; -(* Analogous to read_string_aux_base_tac *) -fun lex_aux_base_tac k = - (simp [lex_aux_post_def, lex_aux_def] \\ xsimpl +(* Analogous to read_string_aux_imp_base_tac *) +fun lex_aux_imp_base_tac k = + (simp [lex_aux_imp_post_def, lex_aux_def] \\ xsimpl \\ qexists k \\ xsimpl \\ simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def, Fail_exn_def]); -Theorem lex_aux_spec: +Theorem lex_aux_imp_spec: ∀ depth s acc depthv accv p is fs fd. NUM depth depthv ∧ LIST_TYPE MLSEXP_TOKEN_TYPE acc accv ⇒ - app (p:'ffi ffi_proj) Sexp_lex_aux_v [depthv; is; accv] + app (p:'ffi ffi_proj) Sexp_lex_aux_imp_v [depthv; is; accv] (STDIO fs * INSTREAM_STR fd is s fs) - (lex_aux_post depth s acc fs is fd) + (lex_aux_imp_post depth s acc fs is fd) Proof ho_match_mp_tac lex_aux_ind \\ rpt strip_tac - \\ qmatch_goalsub_abbrev_tac ‘lex_aux_post _ s₁’ - \\ xcf_with_def $ definition "Sexp_lex_aux_v_def" + \\ qmatch_goalsub_abbrev_tac ‘lex_aux_imp_post _ s₁’ + \\ xcf_with_def $ definition "Sexp_lex_aux_imp_v_def" (* [] *) >- (xlet ‘POSTv chv. SEP_EXISTS k. @@ -368,9 +399,9 @@ Proof \\ unabbrev_all_tac \\ gvs [OPTION_TYPE_def] \\ xmatch \\ xlet_autop \\ xif - >- (xvar \\ lex_aux_base_tac ‘k’) + >- (xvar \\ lex_aux_imp_base_tac ‘k’) \\ xlet_autop - \\ xraise \\ lex_aux_base_tac ‘k’) + \\ xraise \\ lex_aux_imp_base_tac ‘k’) (* c::cs *) \\ xlet ‘POSTv chv. SEP_EXISTS k. STDIO (forwardFD fs fd k) * @@ -378,14 +409,14 @@ Proof &OPTION_TYPE CHAR (oHD s₁) chv’ >- (xapp_spec b_input1_spec_str) \\ unabbrev_all_tac \\ gvs [OPTION_TYPE_def] - \\ rename [‘lex_aux_post _ (STRING c s)’] + \\ rename [‘lex_aux_imp_post _ (STRING c s)’] \\ xmatch \\ xlet_autop \\ xif >- (last_x_assum $ drule_all_then assume_tac \\ xapp \\ qexistsl [‘emp’, ‘forwardFD fs fd k’, ‘fd’] - \\ simp [lex_aux_post_def, lex_aux_def] + \\ simp [lex_aux_imp_post_def, lex_aux_def] \\ ntac 2 CASE_TAC \\ STDIO_forwardFD_INSTREAM_STR_tac) \\ xlet_autop @@ -393,26 +424,26 @@ Proof (ntac 3 xlet_autop \\ xapp \\ qexistsl [‘emp’, ‘forwardFD fs fd k’, ‘fd’] - \\ gvs [lex_aux_post_def, lex_aux_def] + \\ gvs [lex_aux_imp_post_def, lex_aux_def] \\ conj_tac >- (simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def]) \\ ntac 2 CASE_TAC \\ STDIO_forwardFD_INSTREAM_STR_tac) \\ xlet_autop \\ xif >- (xlet_autop - \\ xif >- (xlet_autop \\ xraise \\ lex_aux_base_tac ‘k’) + \\ xif >- (xlet_autop \\ xraise \\ lex_aux_imp_base_tac ‘k’) \\ xlet_autop - \\ xif >- (xlet_autop \\ xcon \\ lex_aux_base_tac ‘k’) + \\ xif >- (xlet_autop \\ xcon \\ lex_aux_imp_base_tac ‘k’) \\ ntac 3 xlet_autop \\ xapp \\ qexistsl [‘emp’, ‘forwardFD fs fd k’, ‘fd’] - \\ gvs [lex_aux_post_def, lex_aux_def] + \\ gvs [lex_aux_imp_post_def, lex_aux_def] \\ conj_tac >- (simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def]) \\ ntac 2 CASE_TAC \\ STDIO_forwardFD_INSTREAM_STR_tac) \\ xlet_autop \\ xif >- - (simp [lex_aux_post_def, lex_aux_def, isSpace_def] + (simp [lex_aux_imp_post_def, lex_aux_def, isSpace_def] \\ namedCases_on ‘mlsexp$read_string s’ ["l", "r"] >- (namedCases_on ‘l’ ["msg rest"] @@ -440,13 +471,13 @@ Proof >- (ntac 2 xlet_autop \\ xcon \\ simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def] - \\ lex_aux_base_tac ‘k’) + \\ lex_aux_imp_base_tac ‘k’) \\ ntac 2 xlet_autop \\ xapp \\ qexistsl [‘emp’, ‘forwardFD fs fd k’, ‘fd’] - \\ simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def, lex_aux_post_def] + \\ simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def, lex_aux_imp_post_def] \\ ntac 2 CASE_TAC \\ STDIO_forwardFD_INSTREAM_STR_tac) - \\ simp [lex_aux_post_def, lex_aux_def] + \\ simp [lex_aux_imp_post_def, lex_aux_def] \\ ntac 2 xlet_autop \\ namedCases_on ‘read_symbol s [c]’ ["sym rest"] \\ xlet ‘POSTv symv. SEP_EXISTS k. @@ -459,16 +490,16 @@ Proof \\ simp [LIST_TYPE_def] \\ STDIO_forwardFD_INSTREAM_STR_tac) \\ xlet_autop - \\ xif >- (ntac 2 xlet_autop \\ xcon \\ lex_aux_base_tac ‘k’) + \\ xif >- (ntac 2 xlet_autop \\ xcon \\ lex_aux_imp_base_tac ‘k’) \\ ntac 2 xlet_autop \\ xapp \\ qexistsl [‘emp’, ‘forwardFD fs fd k’, ‘fd’] - \\ simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def, lex_aux_post_def] + \\ simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def, lex_aux_imp_post_def] \\ ntac 2 CASE_TAC \\ STDIO_forwardFD_INSTREAM_STR_tac QED -Theorem lex_spec: - app (p:'ffi ffi_proj) Sexp_lex_v [is] +Theorem lex_imp_spec: + app (p:'ffi ffi_proj) Sexp_lex_imp_v [is] (STDIO fs * INSTREAM_STR fd is s fs) (case lex s of | INL (msg, rest) => @@ -481,15 +512,15 @@ Theorem lex_spec: INSTREAM_STR fd is rest (forwardFD fs fd k) * &(LIST_TYPE MLSEXP_TOKEN_TYPE toks tokvs)) Proof - xcf_with_def $ definition "Sexp_lex_v_def" + xcf_with_def $ definition "Sexp_lex_imp_v_def" \\ xlet_autop \\ xapp - \\ simp [lex_aux_post_def, lex_def] + \\ simp [lex_aux_imp_post_def, lex_def] \\ qexistsl [‘emp’, ‘s’, ‘fs’, ‘fd’, ‘[]’] \\ simp [LIST_TYPE_def, MLSEXP_TOKEN_TYPE_def] \\ xsimpl QED -Theorem parse_spec: - app (p:'ffi ffi_proj) Sexp_parse_v [is] +Theorem b_inputSexp_spec: + app (p:'ffi ffi_proj) Sexp_b_inputSexp_v [is] (STDIO fs * INSTREAM_STR fd is s fs) (case parse s of | INL (msg, rest) => @@ -502,7 +533,7 @@ Theorem parse_spec: INSTREAM_STR fd is rest (forwardFD fs fd k) * &(SEXP_TYPE se sev)) Proof - xcf_with_def $ definition "Sexp_parse_v_def" + xcf_with_def $ definition "Sexp_b_inputSexp_v_def" \\ ntac 2 xlet_autop \\ simp [parse_def] \\ namedCases_on ‘lex s’ ["l", "r"] diff --git a/basis/pure/mlsexpScript.sml b/basis/pure/mlsexpScript.sml index e50f27f9c9..836c510ee6 100644 --- a/basis/pure/mlsexpScript.sml +++ b/basis/pure/mlsexpScript.sml @@ -184,6 +184,14 @@ Proof EVAL_TAC QED +(* Function for parsing in the style of the Standard ML basis library. *) +Definition fromString_def: + fromString (input: mlstring) : sexp option = + case parse (explode input) of + | INL _ => NONE + | INR (se, _) => SOME se +End + (*--------------------------------------------------------------* Pretty printing of str_tree *--------------------------------------------------------------*) diff --git a/basis/types.txt b/basis/types.txt index 87cfea1f78..35a518d1d5 100644 --- a/basis/types.txt +++ b/basis/types.txt @@ -389,7 +389,8 @@ TextIO.foldChars: (char -> 'a -> 'a) -> 'a -> string option -> 'a option TextIO.foldLines: char -> (string -> 'a -> 'a) -> 'a -> string option -> 'a option TextIO.foldTokens: char -> (char -> bool) -> (string -> 'a) -> ('a list -> 'b -> 'b) -> 'b -> string option -> 'b option Sexp.pp_sexp: Sexp.sexp -> PrettyPrinter.pp_data -Sexp.parse: TextIO.b_instream -> Sexp.sexp +Sexp.fromString: string -> Sexp.sexp option +Sexp.b_inputSexp: TextIO.b_instream -> Sexp.sexp fst: 'a * 'b -> 'a snd: 'a * 'b -> 'b curry: ('a * 'b -> 'c) -> 'a -> 'b -> 'c From 00bee5e9ca47c8751f47bce8edbbb602bb5509a0 Mon Sep 17 00:00:00 2001 From: Magnus Myreen Date: Fri, 12 Dec 2025 16:19:48 +0100 Subject: [PATCH 28/42] Progress on parsing / pretty printing proof --- basis/pure/mlsexpScript.sml | 63 ++++++++++++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/basis/pure/mlsexpScript.sml b/basis/pure/mlsexpScript.sml index 836c510ee6..bd2a934eca 100644 --- a/basis/pure/mlsexpScript.sml +++ b/basis/pure/mlsexpScript.sml @@ -345,7 +345,7 @@ Definition sexp_to_app_list_def: End Definition sexp_to_string_def: - sexp_to_string s = concat (append (Append (sexp_to_app_list s) (List [«\n»]))) + sexp_to_string s = concat (append (sexp_to_app_list s)) End Definition sexp_to_pretty_string_def: @@ -388,3 +388,64 @@ Proof CONV_TAC (PATH_CONV "lrl" (ONCE_REWRITE_CONV [GSYM APPEND_NIL |> cj 1])) \\ rewrite_tac [parse_aux_to_tokens_lemma] \\ fs [parse_aux_def] QED + +Theorem lex_aux_make_str_safe: + ∀m s ts depth. + (s ≠ "" ⇒ isSpace (HD s) ∨ HD s = #")") ⇒ + lex_aux depth (STRCAT (case make_str_safe m of strlit x => x) s) ts = + if depth = 0 then INR (SYMBOL m::ts,s) + else lex_aux depth s (SYMBOL m::ts) +Proof + cheat +QED + +Theorem lex_aux_sexp_to_list: + (∀x s ts depth. + (s ≠ "" ⇒ isSpace (HD s) ∨ HD s = #")") ⇒ + lex_aux depth (explode (concat (append (sexp_to_app_list x))) ++ s) ts = + if depth = 0 then + INR (REVERSE (to_tokens x) ++ ts, s) + else + lex_aux depth s (REVERSE (to_tokens x) ++ ts)) ∧ + (∀xs s ts depth. + (s ≠ "" ⇒ isSpace (HD s) ∨ HD s = #")") ∧ depth ≠ 0 ⇒ + lex_aux depth (explode (concat (append (sexps_to_app_list xs))) ++ s) ts = + lex_aux depth s (REVERSE (FLAT (MAP (λx. to_tokens x) xs)) ++ ts)) +Proof + Induct + >- + (fs [to_tokens_def] + \\ simp [sexp_to_app_list_def,concat_def] + \\ rpt strip_tac + \\ irule lex_aux_make_str_safe \\ simp []) + >- + (simp [sexp_to_app_list_def,concat_def] \\ fs [concat_def] + \\ rw [] \\ simp [Once lex_aux_def, EVAL “isSpace #"("”] + \\ simp_tac std_ss [APPEND,GSYM APPEND_ASSOC] + \\ last_x_assum $ DEP_REWRITE_TAC o single + \\ fs [] \\ simp [Once lex_aux_def, EVAL “isSpace #")"”] + \\ simp [to_tokens_def] \\ fs [REVERSE_APPEND] + \\ simp_tac std_ss [APPEND,GSYM APPEND_ASSOC]) + >- + (simp [sexp_to_app_list_def]) + \\ simp [sexp_to_app_list_def] \\ rw [] + >- gvs [NULL_EQ] + \\ fs [concat_def] + \\ simp_tac std_ss [APPEND,GSYM APPEND_ASSOC] + \\ last_x_assum $ DEP_REWRITE_TAC o single + \\ fs [EVAL “isSpace #" "”] + \\ simp [Once lex_aux_def, EVAL “isSpace #" "”] + \\ fs [REVERSE_APPEND] + \\ simp_tac std_ss [APPEND,GSYM APPEND_ASSOC] +QED + +Theorem parse_sexp_to_string: + ∀s x. + (s ≠ "" ⇒ isSpace (HD s)) ⇒ + parse (explode (sexp_to_string x) ++ s) = INR (x, s) +Proof + fs [sexp_to_string_def,parse_def,lex_def] + \\ rpt strip_tac + \\ DEP_REWRITE_TAC [cj 1 lex_aux_sexp_to_list] \\ fs [] + \\ simp [parse_aux_to_tokens_thm] +QED From ea6496cfd1f20e3f570b331eaecf33169c7bedd8 Mon Sep 17 00:00:00 2001 From: Magnus Myreen Date: Sat, 13 Dec 2025 23:30:24 +0100 Subject: [PATCH 29/42] More proofs about parsing printing of sexp --- basis/pure/mlsexpScript.sml | 153 ++++++++++++++++++++++++++++++++++++ 1 file changed, 153 insertions(+) diff --git a/basis/pure/mlsexpScript.sml b/basis/pure/mlsexpScript.sml index bd2a934eca..46bd4297a7 100644 --- a/basis/pure/mlsexpScript.sml +++ b/basis/pure/mlsexpScript.sml @@ -399,6 +399,132 @@ Proof cheat QED +Theorem flatten_acc: + ∀x indent s. flatten indent x s = flatten indent x [] ++ s +Proof + Induct + \\ once_rewrite_tac [flatten_def] + \\ simp_tac std_ss [] + \\ rpt $ pop_assum $ once_rewrite_tac o single + \\ fs [] +QED + +Theorem lex_aux_spaces: + ∀t rest ts depth. + EVERY isSpace t ⇒ + lex_aux depth (t ++ rest) ts = lex_aux depth rest ts +Proof + Induct \\ rw [] \\ simp [Once lex_aux_def] +QED + +Theorem lex_aux_sexp2tree: + (∀x s ts depth m n indent. + (s ≠ "" ⇒ isSpace (HD s) ∨ HD s = #")") ∧ + indent ≠ strlit "" ∧ EVERY isSpace (explode indent) ⇒ + lex_aux depth + (STRCAT + (explode + (concat + (flatten indent + (smart_remove m n (annotate (v2pretty (sexp2tree x)))) + []))) s) ts = + (if depth = 0 then INR (REVERSE (to_tokens x) ++ ts,s) + else lex_aux depth s (REVERSE (to_tokens x) ++ ts)) ∧ + lex_aux depth + (STRCAT + (explode + (concat + (flatten indent + (remove_all (annotate (v2pretty (sexp2tree x)))) + []))) s) ts = + (if depth = 0 then INR (REVERSE (to_tokens x) ++ ts,s) + else lex_aux depth s (REVERSE (to_tokens x) ++ ts))) ∧ + (∀xs s ts depth indent m n. + (s ≠ "" ⇒ isSpace (HD s) ∨ HD s = #")") ∧ depth ≠ 0 ∧ + indent ≠ strlit "" ∧ EVERY isSpace (explode indent) ⇒ + lex_aux depth + (STRCAT + (explode + (concat + (flatten indent + (smart_remove m n (annotate (newlines (vs2pretty (sexp2trees xs))))) + []))) s) ts = + (lex_aux depth s (REVERSE (FLAT (MAP to_tokens xs)) ++ ts)) ∧ + lex_aux depth + (STRCAT + (explode + (concat + (flatten indent + (remove_all (annotate (newlines (vs2pretty (sexp2trees xs))))) + []))) s) ts = + (lex_aux depth s (REVERSE (FLAT (MAP to_tokens xs)) ++ ts))) +Proof + Induct + >- + (simp [sexp2tree_def] + \\ simp [v2strs_def,v2pretty_def] + \\ simp [annotate_def,to_tokens_def] + \\ simp [smart_remove_def,remove_all_def, flatten_def] + \\ rpt strip_tac + \\ DEP_REWRITE_TAC [GSYM lex_aux_make_str_safe] + \\ Cases_on ‘make_str_safe m’ \\ simp []) + >- + (rpt gen_tac \\ strip_tac + \\ simp [sexp2tree_def] + \\ once_rewrite_tac [v2pretty_def] + \\ simp [annotate_def,remove_all_def,smart_remove_def] + \\ conj_tac + THENL [IF_CASES_TAC, all_tac] + \\ simp [flatten_def,concat_def] + \\ Cases_on ‘indent’ \\ gvs [concat_def] + \\ simp [Once lex_aux_def, EVAL “isSpace #"("”] + \\ once_rewrite_tac [flatten_acc] \\ fs [] + \\ simp_tac std_ss [APPEND,GSYM APPEND_ASSOC] + \\ gvs [SF DNF_ss] + \\ qabbrev_tac ‘new_indent = strlit (STRCAT s' " ")’ + \\ rpt $ first_x_assum $ qspecl_then [‘")"++s’, + ‘OPEN::ts’,‘depth + 1’,‘new_indent’] mp_tac + \\ ‘new_indent ≠ «» ∧ EVERY isSpace (explode new_indent)’ by cheat + \\ simp [] + \\ simp_tac std_ss [APPEND,GSYM APPEND_ASSOC] + \\ rpt strip_tac + \\ simp [Once lex_aux_def,EVAL “isSpace #")"”] + \\ simp [to_tokens_def,REVERSE_APPEND] + \\ simp_tac std_ss [APPEND,GSYM APPEND_ASSOC, SF ETA_ss]) + >- + (simp [sexp2tree_def] + \\ simp [v2pretty_def] + \\ simp [newlines_def,annotate_def,smart_remove_def, + remove_all_def,flatten_def,concat_def]) + \\ Cases_on ‘xs’ \\ fs [] + \\ simp [REVERSE_APPEND,sexp2tree_def] + \\ once_rewrite_tac [v2pretty_def |> cj 2] \\ simp [] + \\ once_rewrite_tac [v2pretty_def |> cj 2] \\ simp [] + \\ simp [newlines_def] + \\ simp [annotate_def,remove_all_def,smart_remove_def] + \\ rpt gen_tac + \\ strip_tac + \\ conj_tac + THENL [IF_CASES_TAC, all_tac] + \\ simp [Once flatten_def] + \\ once_rewrite_tac [flatten_acc] + \\ gvs [concat_def] + \\ simp_tac std_ss [APPEND,GSYM APPEND_ASSOC] + \\ gvs [SF DNF_ss] + \\ simp_tac std_ss [APPEND,GSYM APPEND_ASSOC] + \\ last_x_assum $ DEP_REWRITE_TAC o single + \\ last_x_assum $ DEP_REWRITE_TAC o single + \\ simp [] + \\ Cases_on ‘indent’ + \\ rename [‘EVERY isSpace (explode (strlit t))’] + \\ (conj_tac >- (gvs [EVAL “isSpace #" "”] \\ Cases_on ‘t’ \\ fs [])) + \\ simp [Once lex_aux_def, EVAL “isSpace #" "”] + \\ simp_tac std_ss [APPEND,GSYM APPEND_ASSOC] + \\ gvs [sexp2tree_def,lex_aux_spaces] + \\ gvs [v2pretty_def |> cj 2 |> Q.SPEC ‘_ :: _’ |> SRULE []] + \\ simp [REVERSE_APPEND] +QED + Theorem lex_aux_sexp_to_list: (∀x s ts depth. (s ≠ "" ⇒ isSpace (HD s) ∨ HD s = #")") ⇒ @@ -449,3 +575,30 @@ Proof \\ DEP_REWRITE_TAC [cj 1 lex_aux_sexp_to_list] \\ fs [] \\ simp [parse_aux_to_tokens_thm] QED + +Theorem parse_sexp_to_pretty_string: + ∀s x. + parse (explode (sexp_to_pretty_string x) ++ s) = INR (x, "\n" ++ s) +Proof + fs [sexp_to_pretty_string_def,parse_def,lex_def,v2strs_def] + \\ rpt strip_tac + \\ once_rewrite_tac [flatten_acc] \\ simp [] + \\ gvs [concat_append] + \\ simp_tac std_ss [APPEND,GSYM APPEND_ASSOC] + \\ DEP_REWRITE_TAC [lex_aux_sexp2tree |> cj 1 |> cj 1] + \\ simp [parse_aux_to_tokens_thm] \\ EVAL_TAC +QED + +Theorem fromString_sexp_to_string: + fromString (sexp_to_string x) = SOME x +Proof + fs [fromString_def,AllCaseEqs(),PULL_EXISTS] + \\ qspec_then ‘[]’ mp_tac parse_sexp_to_string \\ fs [] +QED + +Theorem fromString_sexp_to_pretty_string: + fromString (sexp_to_pretty_string x) = SOME x +Proof + fs [fromString_def,AllCaseEqs(),PULL_EXISTS] + \\ qspec_then ‘[]’ mp_tac parse_sexp_to_pretty_string \\ fs [] +QED From ed4bbe1432cbdf025c86a1b060ce786b1232a8a7 Mon Sep 17 00:00:00 2001 From: Magnus Myreen Date: Sun, 14 Dec 2025 09:22:40 +0100 Subject: [PATCH 30/42] Finish sexp parsing printing proofs --- basis/pure/mlsexpScript.sml | 88 ++++++++++++++++++++++++++++++++----- 1 file changed, 77 insertions(+), 11 deletions(-) diff --git a/basis/pure/mlsexpScript.sml b/basis/pure/mlsexpScript.sml index 46bd4297a7..9990eeb4ae 100644 --- a/basis/pure/mlsexpScript.sml +++ b/basis/pure/mlsexpScript.sml @@ -37,11 +37,12 @@ Definition read_string_aux_def: (case rest of | [] => read_string_aux rest acc (* causes error: unterminated string *) | (e::rest) => - (if e = #"\\" then read_string_aux rest (#"\""::acc) else + (if e = #"\\" then read_string_aux rest (#"\\"::acc) else + if e = #"\"" then read_string_aux rest (#"\""::acc) else if e = #"0" then read_string_aux rest (#"\000"::acc) else if e = #"n" then read_string_aux rest (#"\n"::acc) else - if e = #"r" then read_string_aux rest (#"\""::acc) else - if e = #"t" then read_string_aux rest (#"\""::acc) else + if e = #"r" then read_string_aux rest (#"\r"::acc) else + if e = #"t" then read_string_aux rest (#"\t"::acc) else INL («read_string_aux: unrecognised escape», rest))) else read_string_aux rest (c::acc) @@ -311,7 +312,7 @@ QED *--------------------------------------------------------------*) Definition is_safe_char_def: - is_safe_char c ⇔ ~MEM c " \t\n()\"\000" + is_safe_char c ⇔ ~MEM c "()\"\000" ∧ ¬isSpace c End Definition str_every_def: @@ -322,6 +323,7 @@ End Definition make_str_safe_def: make_str_safe s = + if s = strlit "" then strlit "\"\"" else if str_every is_safe_char (strlen s) s then s else escape_str s End @@ -389,14 +391,72 @@ Proof \\ rewrite_tac [parse_aux_to_tokens_lemma] \\ fs [parse_aux_def] QED +Theorem str_every_thm: + ∀P i m. str_every P i m ∧ i ≤ strlen m ⇒ ∀k. k < i ⇒ P $ EL k (explode m) +Proof + Induct_on ‘i’\\ fs [] + \\ simp [Once str_every_def] \\ rw [] + \\ last_x_assum drule \\ gvs [] + \\ ‘k = i ∨ k < i’ by decide_tac + \\ fs [] \\ Cases_on ‘m’ \\ gvs [strsub_def] +QED + +Theorem read_symbol_thm: + ∀t s acc. + EVERY is_safe_char t ∧ (s ≠ "" ⇒ isSpace (HD s) ∨ HD s = #")") ⇒ + read_symbol (t ++ s) acc = (implode (REVERSE acc ++ t), s) +Proof + Induct + >- (rw [] \\ Cases_on ‘s’ \\ gvs [read_symbol_def]) + \\ gvs [] \\ rw [] + \\ gvs [read_symbol_def] + \\ simp_tac std_ss [GSYM APPEND_ASSOC,APPEND, AllCaseEqs()] + \\ gvs [is_safe_char_def] +QED + +Theorem read_string_aux_thm: + ∀xs s acc. + read_string_aux (STRCAT (STRCAT (CONCAT (MAP char_escaped xs)) "\"") s) acc = + INR (implode (REVERSE acc ++ xs), s) +Proof + Induct + \\ simp [read_string_def,read_string_aux_def] + \\ gvs [char_escaped_def,char_escape_seq_def] + \\ rw [] \\ gvs [] + \\ gvs [read_string_aux_def] + \\ simp_tac std_ss [GSYM APPEND_ASSOC,APPEND] +QED + Theorem lex_aux_make_str_safe: ∀m s ts depth. - (s ≠ "" ⇒ isSpace (HD s) ∨ HD s = #")") ⇒ + (s ≠ "" ⇒ isSpace (HD s) ∨ HD s = #")") ∧ + (depth = 0 ⇒ ts = []) ⇒ lex_aux depth (STRCAT (case make_str_safe m of strlit x => x) s) ts = if depth = 0 then INR (SYMBOL m::ts,s) else lex_aux depth s (SYMBOL m::ts) Proof - cheat + rpt gen_tac + \\ simp [make_str_safe_def] + \\ IF_CASES_TAC \\ fs [] + >- + (simp [Once lex_aux_def, EVAL “isSpace #"\""”] + \\ simp [Once read_string_def] + \\ simp [Once read_string_aux_def, implode_def]) + \\ IF_CASES_TAC \\ fs [] + >- + (dxrule str_every_thm \\ simp [] + \\ Cases_on ‘m’ \\ gvs [] + \\ rpt strip_tac + \\ rename [‘lex_aux depth (STRCAT input s) ts’] + \\ ‘EVERY is_safe_char input’ by gvs [EVERY_EL] + \\ qpat_x_assum ‘∀_._’ kall_tac + \\ Cases_on ‘input’ \\ gvs [] + \\ gvs [lex_aux_def,is_safe_char_def] + \\ DEP_REWRITE_TAC [read_symbol_thm] + \\ simp [implode_def]) + \\ simp [escape_str_def,implode_def] + \\ simp [Once lex_aux_def, EVAL “isSpace #"\""”] + \\ simp [read_string_def,read_string_aux_thm] QED Theorem flatten_acc: @@ -420,6 +480,7 @@ QED Theorem lex_aux_sexp2tree: (∀x s ts depth m n indent. (s ≠ "" ⇒ isSpace (HD s) ∨ HD s = #")") ∧ + (depth = 0 ⇒ ts = []) ∧ indent ≠ strlit "" ∧ EVERY isSpace (explode indent) ⇒ lex_aux depth (STRCAT @@ -466,8 +527,9 @@ Proof \\ simp [annotate_def,to_tokens_def] \\ simp [smart_remove_def,remove_all_def, flatten_def] \\ rpt strip_tac - \\ DEP_REWRITE_TAC [GSYM lex_aux_make_str_safe] - \\ Cases_on ‘make_str_safe m’ \\ simp []) + \\ drule_all lex_aux_make_str_safe + \\ Cases_on ‘make_str_safe m’ \\ simp [] + \\ disch_then $ qspec_then ‘m’ mp_tac \\ simp []) >- (rpt gen_tac \\ strip_tac \\ simp [sexp2tree_def] @@ -484,7 +546,8 @@ Proof \\ qabbrev_tac ‘new_indent = strlit (STRCAT s' " ")’ \\ rpt $ first_x_assum $ qspecl_then [‘")"++s’, ‘OPEN::ts’,‘depth + 1’,‘new_indent’] mp_tac - \\ ‘new_indent ≠ «» ∧ EVERY isSpace (explode new_indent)’ by cheat + \\ ‘new_indent ≠ «» ∧ EVERY isSpace (explode new_indent)’ by + (gvs [Abbr‘new_indent’] \\ EVAL_TAC) \\ simp [] \\ simp_tac std_ss [APPEND,GSYM APPEND_ASSOC] \\ rpt strip_tac @@ -527,7 +590,8 @@ QED Theorem lex_aux_sexp_to_list: (∀x s ts depth. - (s ≠ "" ⇒ isSpace (HD s) ∨ HD s = #")") ⇒ + (s ≠ "" ⇒ isSpace (HD s) ∨ HD s = #")") ∧ + (depth = 0 ⇒ ts = []) ⇒ lex_aux depth (explode (concat (append (sexp_to_app_list x))) ++ s) ts = if depth = 0 then INR (REVERSE (to_tokens x) ++ ts, s) @@ -543,7 +607,9 @@ Proof (fs [to_tokens_def] \\ simp [sexp_to_app_list_def,concat_def] \\ rpt strip_tac - \\ irule lex_aux_make_str_safe \\ simp []) + \\ drule_all lex_aux_make_str_safe + \\ disch_then $ qspec_then ‘m’ assume_tac + \\ simp []) >- (simp [sexp_to_app_list_def,concat_def] \\ fs [concat_def] \\ rw [] \\ simp [Once lex_aux_def, EVAL “isSpace #"("”] From 9c247f83963032c3975af43ad97dd20b1647d8de Mon Sep 17 00:00:00 2001 From: Magnus Myreen Date: Sun, 14 Dec 2025 09:25:45 +0100 Subject: [PATCH 31/42] Rename v2strs to str_tree_to_strs --- basis/pure/mlsexpScript.sml | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/basis/pure/mlsexpScript.sml b/basis/pure/mlsexpScript.sml index 9990eeb4ae..242accc9b4 100644 --- a/basis/pure/mlsexpScript.sml +++ b/basis/pure/mlsexpScript.sml @@ -284,24 +284,24 @@ Definition flatten_def: flatten indent p1 ((if b then indent else strlit " ") :: flatten indent p2 s) End -Definition v2strs_def: - v2strs end v = flatten (strlit "\n") (smart_remove 0 0 (annotate (v2pretty v))) [end] +Definition str_tree_to_strs_def: + str_tree_to_strs end v = flatten (strlit "\n") (smart_remove 0 0 (annotate (v2pretty v))) [end] End -Theorem test1_v2strs[local]: - concat (v2strs (strlit "") - (Trees [Str (strlit "hello"); - Str (strlit "there")])) = +Theorem test1_str_tree_to_strs[local]: + concat (str_tree_to_strs (strlit "") + (Trees [Str (strlit "hello"); + Str (strlit "there")])) = strlit "(hello there)" Proof EVAL_TAC QED -Theorem test2_v2strs[local]: - concat (v2strs (strlit "") - (Trees [Str (strlit "test"); - GrabLine (Str (strlit "hi")); - GrabLine (Str (strlit "there"))])) = +Theorem test2_str_tree_to_strs[local]: + concat (str_tree_to_strs (strlit "") + (Trees [Str (strlit "test"); + GrabLine (Str (strlit "hi")); + GrabLine (Str (strlit "there"))])) = strlit "(test\n hi\n there)" Proof EVAL_TAC @@ -351,7 +351,7 @@ Definition sexp_to_string_def: End Definition sexp_to_pretty_string_def: - sexp_to_pretty_string s = concat (v2strs (strlit "\n") (sexp2tree s)) + sexp_to_pretty_string s = concat (str_tree_to_strs (strlit "\n") (sexp2tree s)) End (*--------------------------------------------------------------* @@ -523,7 +523,7 @@ Proof Induct >- (simp [sexp2tree_def] - \\ simp [v2strs_def,v2pretty_def] + \\ simp [str_tree_to_strs_def,v2pretty_def] \\ simp [annotate_def,to_tokens_def] \\ simp [smart_remove_def,remove_all_def, flatten_def] \\ rpt strip_tac @@ -646,7 +646,7 @@ Theorem parse_sexp_to_pretty_string: ∀s x. parse (explode (sexp_to_pretty_string x) ++ s) = INR (x, "\n" ++ s) Proof - fs [sexp_to_pretty_string_def,parse_def,lex_def,v2strs_def] + fs [sexp_to_pretty_string_def,parse_def,lex_def,str_tree_to_strs_def] \\ rpt strip_tac \\ once_rewrite_tac [flatten_acc] \\ simp [] \\ gvs [concat_append] From 1716e53363cebc527eae07b6ea9780ae9a0d20f7 Mon Sep 17 00:00:00 2001 From: Daniel Nezamabadi <55559979+dnezam@users.noreply.github.com> Date: Sun, 14 Dec 2025 09:58:27 +0100 Subject: [PATCH 32/42] Fix SexpProg --- basis/SexpProgScript.sml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/basis/SexpProgScript.sml b/basis/SexpProgScript.sml index 2b966a6792..0ef1f66175 100644 --- a/basis/SexpProgScript.sml +++ b/basis/SexpProgScript.sml @@ -75,11 +75,12 @@ Quote add_cakeml: (case TextIO.b_input1 input of None => read_string_aux_imp input acc (* causes error: unterminated string *) | Some e => - (if e = #"\\" then read_string_aux_imp input (#"\""::acc) else + (if e = #"\\" then read_string_aux_imp input (#"\\"::acc) else + if e = #"\"" then read_string_aux_imp input (#"\""::acc) else if e = #"0" then read_string_aux_imp input (#"\000"::acc) else if e = #"n" then read_string_aux_imp input (#"\n"::acc) else - if e = #"r" then read_string_aux_imp input (#"\""::acc) else - if e = #"t" then read_string_aux_imp input (#"\""::acc) else + if e = #"r" then read_string_aux_imp input (#"\r"::acc) else + if e = #"t" then read_string_aux_imp input (#"\t"::acc) else raise Fail "read_string_aux: unrecognised escape")) else read_string_aux_imp input (c::acc) @@ -238,7 +239,7 @@ Proof \\ STDIO_forwardFD_INSTREAM_STR_tac) \\ xmatch (* escape characters *) - \\ ntac 5 ( + \\ ntac 6 ( xlet_autop \\ xif >- From faa9511b09872be4c091f0e53ef628cfd7409476 Mon Sep 17 00:00:00 2001 From: Daniel Nezamabadi <55559979+dnezam@users.noreply.github.com> Date: Sun, 14 Dec 2025 10:05:19 +0100 Subject: [PATCH 33/42] Prove lex_aux precondition in SexpProg --- basis/SexpProgScript.sml | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/basis/SexpProgScript.sml b/basis/SexpProgScript.sml index 0ef1f66175..248be40205 100644 --- a/basis/SexpProgScript.sml +++ b/basis/SexpProgScript.sml @@ -60,7 +60,24 @@ val r = translate mlsexpTheory.parse_aux_def; val r = translate mlsexpTheory.read_string_aux_def; val r = translate mlsexpTheory.read_string_def; val r = translate $ SRULE [isSpace_def] mlsexpTheory.read_symbol_def; -val r = translate $ SRULE [isSpace_def] mlsexpTheory.lex_aux_def; + +val r = translate_no_ind mlsexpTheory.lex_aux_def; + +Theorem lex_aux_ind[local]: + lex_aux_ind +Proof + once_rewrite_tac [fetch "-" "lex_aux_ind_def"] + \\ rpt gen_tac + \\ rpt (disch_then strip_assume_tac) + \\ match_mp_tac (latest_ind ()) + \\ rpt strip_tac + \\ last_x_assum match_mp_tac + \\ rpt strip_tac + \\ gvs [FORALL_PROD] +QED + +val _ = lex_aux_ind |> update_precondition; + val r = translate mlsexpTheory.lex_def; val r = translate mlsexpTheory.parse_def; @@ -386,7 +403,7 @@ Theorem lex_aux_imp_spec: (STDIO fs * INSTREAM_STR fd is s fs) (lex_aux_imp_post depth s acc fs is fd) Proof - ho_match_mp_tac lex_aux_ind + ho_match_mp_tac mlsexpTheory.lex_aux_ind \\ rpt strip_tac \\ qmatch_goalsub_abbrev_tac ‘lex_aux_imp_post _ s₁’ \\ xcf_with_def $ definition "Sexp_lex_aux_imp_v_def" From cc58ae201350e67a09ceef5fcfdf3c937f54be2e Mon Sep 17 00:00:00 2001 From: Magnus Myreen Date: Mon, 15 Dec 2025 11:42:17 +0100 Subject: [PATCH 34/42] Remove str_treeTheory --- basis/SexpProgScript.sml | 51 +++++++- basis/types.txt | 5 +- compiler/backend/README.md | 3 - compiler/backend/displayLangScript.sml | 9 +- compiler/backend/presLangScript.sml | 20 ++-- compiler/backend/str_treeScript.sml | 159 ------------------------- pancake/pan_passesScript.sml | 6 +- 7 files changed, 71 insertions(+), 182 deletions(-) delete mode 100644 compiler/backend/str_treeScript.sml diff --git a/basis/SexpProgScript.sml b/basis/SexpProgScript.sml index 248be40205..7c1fc60a37 100644 --- a/basis/SexpProgScript.sml +++ b/basis/SexpProgScript.sml @@ -27,12 +27,17 @@ val _ = ml_prog_update (open_module "Sexp"); (* Temporarily disable full type names to avoid mlsexp_sexp as the exported type name *) val _ = with_flag (use_full_type_names, false) register_type “:mlsexp$sexp”; +val _ = with_flag (use_full_type_names, false) register_type “:mlsexp$str_tree”; (* Pretty printing function for s-expressions used by the REPL *) Quote add_cakeml: fun pp_sexp se = case se of Atom s => PrettyPrinter.app_block "Atom" [PrettyPrinter.token s] - | Expr ses => PrettyPrinter.app_block "Expr" [PrettyPrinter.pp_list pp_sexp ses] + | Expr ses => PrettyPrinter.app_block "Expr" [PrettyPrinter.pp_list pp_sexp ses]; + fun pp_str_tree se = case se of + Str s => PrettyPrinter.app_block "Str" [PrettyPrinter.token s] + | Grabline s => PrettyPrinter.app_block "Grabline" [pp_str_tree s] + | Trees ses => PrettyPrinter.app_block "Trees" [PrettyPrinter.pp_list pp_str_tree ses]; End (* We will define two functions to generate s-expressions: @@ -161,6 +166,50 @@ Quote add_cakeml: | (v::_) => v End + +val _ = ml_prog_update open_local_block; + +val _ = translate mlsexpTheory.flatten_def; +val _ = translate mlsexpTheory.get_size_def; +val _ = translate mlsexpTheory.get_next_size_def; +val _ = translate mlsexpTheory.remove_all_def; +val _ = translate mlsexpTheory.smart_remove_def; +val _ = translate mlsexpTheory.annotate_def; +val _ = translate mlsexpTheory.newlines_def; +val _ = translate mlsexpTheory.v2pretty_def; +val _ = translate mlsexpTheory.str_every_def; +val _ = translate (mlsexpTheory.is_safe_char_def |> SRULE []); +val _ = translate mlsexpTheory.make_str_safe_def; + +Theorem str_every_side: + ∀x n P. n ≤ strlen x ⇒ str_every_side P n x +Proof + Cases \\ fs [] \\ Induct \\ fs [] + \\ simp [Once (fetch "-" "str_every_side_def")] +QED + +Theorem make_str_safe_side[local]: + ∀x. make_str_safe_side x = T +Proof + gvs [fetch "-" "make_str_safe_side_def"] \\ rw [] + \\ irule_at Any str_every_side \\ fs [] +QED + +val _ = update_precondition make_str_safe_side; + +val _ = translate mlsexpTheory.sexp_to_app_list_def; +val _ = translate mlsexpTheory.sexp2tree_def; + +val _ = ml_prog_update open_local_in_block; + +val _ = next_ml_names := ["str_tree_to_strings"]; +val _ = translate str_tree_to_strs_def; + +val _ = next_ml_names := ["toString"]; +val _ = translate mlsexpTheory.sexp_to_string_def; +val _ = next_ml_names := ["toPrettyString"]; +val _ = translate mlsexpTheory.sexp_to_pretty_string_def; + val _ = ml_prog_update close_local_blocks; val _ = ml_prog_update (close_module NONE); diff --git a/basis/types.txt b/basis/types.txt index 35a518d1d5..267f7426f2 100644 --- a/basis/types.txt +++ b/basis/types.txt @@ -389,8 +389,12 @@ TextIO.foldChars: (char -> 'a -> 'a) -> 'a -> string option -> 'a option TextIO.foldLines: char -> (string -> 'a -> 'a) -> 'a -> string option -> 'a option TextIO.foldTokens: char -> (char -> bool) -> (string -> 'a) -> ('a list -> 'b -> 'b) -> 'b -> string option -> 'b option Sexp.pp_sexp: Sexp.sexp -> PrettyPrinter.pp_data +Sexp.pp_str_tree: Sexp.str_tree -> PrettyPrinter.pp_data Sexp.fromString: string -> Sexp.sexp option Sexp.b_inputSexp: TextIO.b_instream -> Sexp.sexp +Sexp.str_tree_to_strings: string -> Sexp.str_tree -> string list +Sexp.toString: Sexp.sexp -> string +Sexp.toPrettyString: Sexp.sexp -> string fst: 'a * 'b -> 'a snd: 'a * 'b -> 'b curry: ('a * 'b -> 'c) -> 'a -> 'b -> 'c @@ -452,4 +456,3 @@ print: string -> unit print_app_list: string app_list -> unit print_int: int -> unit print_pp: PrettyPrinter.pp_data -> unit - diff --git a/compiler/backend/README.md b/compiler/backend/README.md index e2796f55de..5bafdd772f 100644 --- a/compiler/backend/README.md +++ b/compiler/backend/README.md @@ -314,9 +314,6 @@ This compiler phase maps stackLang programs, which has structure such as If, While, Return etc, to labLang programs that are a soup of goto-like jumps. -[str_treeScript.sml](str_treeScript.sml): -A Lisp inspired tree of mlstrings and a pretty printing function - [wordLangScript.sml](wordLangScript.sml): The wordLang intermediate language consists of structured programs that overate over machine words, a list-like stack and a flat memory. diff --git a/compiler/backend/displayLangScript.sml b/compiler/backend/displayLangScript.sml index a4155fa803..ce81432475 100644 --- a/compiler/backend/displayLangScript.sml +++ b/compiler/backend/displayLangScript.sml @@ -6,7 +6,7 @@ *) Theory displayLang Ancestors - jsonLang mlint backend_common str_tree + jsonLang mlint backend_common mlsexp (* for str_tree *) Libs preamble @@ -68,16 +68,15 @@ End Definition display_to_str_tree_def: (display_to_str_tree (Item tra name es) = - mk_list (Str name :: display_to_str_tree_list es)) ∧ + Trees (Str name :: display_to_str_tree_list es)) ∧ (display_to_str_tree (String s : sExp) = Str s) /\ (display_to_str_tree (Tuple es) = if NULL es then Str (strlit "()") - else mk_list (display_to_str_tree_list es)) ∧ + else Trees (display_to_str_tree_list es)) ∧ (display_to_str_tree (List es) = if NULL es then Str (strlit "()") - else mk_list (MAP GrabLine (display_to_str_tree_list es))) ∧ + else Trees (MAP GrabLine (display_to_str_tree_list es))) ∧ (display_to_str_tree_list [] = []) ∧ (display_to_str_tree_list (x::xs) = display_to_str_tree x :: display_to_str_tree_list xs) End - diff --git a/compiler/backend/presLangScript.sml b/compiler/backend/presLangScript.sml index e4ee08baf8..baa418991f 100644 --- a/compiler/backend/presLangScript.sml +++ b/compiler/backend/presLangScript.sml @@ -1524,14 +1524,14 @@ End Definition source_to_strs_def: source_to_strs decs = - map_to_append (v2strs (strlit "\n\n") o + map_to_append (str_tree_to_strs (strlit "\n\n") o display_to_str_tree o source_to_display_dec) decs End Definition flat_to_strs_def: flat_to_strs (decs:flatLang$dec list) = - map_to_append (v2strs (strlit "\n\n") o + map_to_append (str_tree_to_strs (strlit "\n\n") o display_to_str_tree o flat_to_display_dec) decs End @@ -1539,17 +1539,17 @@ End Definition clos_to_strs_def: clos_to_strs (decs,funs) = let names = clos_to_bvl$get_src_names (decs ++ MAP (SND o SND) funs) LN in - Append (map_to_append (v2strs (strlit "\n\n") o + Append (map_to_append (str_tree_to_strs (strlit "\n\n") o display_to_str_tree o clos_dec_to_display names) decs) - (map_to_append (v2strs (strlit "\n\n") o + (map_to_append (str_tree_to_strs (strlit "\n\n") o display_to_str_tree o clos_fun_to_display names) funs) End Definition bvl_to_strs_def: bvl_to_strs names xs = - map_to_append (v2strs (strlit "\n\n") o + map_to_append (str_tree_to_strs (strlit "\n\n") o display_to_str_tree o bvl_fun_to_display names) xs End @@ -1566,7 +1566,7 @@ val bvl_test = Definition bvi_to_strs_def: bvi_to_strs names xs = - map_to_append (v2strs (strlit "\n\n") o + map_to_append (str_tree_to_strs (strlit "\n\n") o display_to_str_tree o bvi_fun_to_display names) xs End @@ -1583,7 +1583,7 @@ val bvi_test = Definition data_to_strs_def: data_to_strs names xs = - map_to_append (v2strs (strlit "\n\n") o + map_to_append (str_tree_to_strs (strlit "\n\n") o display_to_str_tree o data_fun_to_display names) xs End @@ -1600,21 +1600,21 @@ val data_test = Definition word_to_strs_def: word_to_strs names xs = - map_to_append (v2strs (strlit "\n\n") o + map_to_append (str_tree_to_strs (strlit "\n\n") o display_to_str_tree o word_fun_to_display names) xs End Definition stack_to_strs_def: stack_to_strs names xs = - map_to_append (v2strs (strlit "\n\n") o + map_to_append (str_tree_to_strs (strlit "\n\n") o display_to_str_tree o stack_fun_to_display names) xs End Definition lab_to_strs_def: lab_to_strs names xs = - map_to_append (v2strs (strlit "\n\n") o + map_to_append (str_tree_to_strs (strlit "\n\n") o display_to_str_tree o lab_fun_to_display names) xs End diff --git a/compiler/backend/str_treeScript.sml b/compiler/backend/str_treeScript.sml deleted file mode 100644 index a25313f484..0000000000 --- a/compiler/backend/str_treeScript.sml +++ /dev/null @@ -1,159 +0,0 @@ -(* - A Lisp inspired tree of mlstrings and a pretty printing function -*) -Theory str_tree -Ancestors - arithmetic list pair mlstring - -(* datatype and helper functions *) - -Datatype: - str_tree = Str mlstring | Pair str_tree str_tree | GrabLine str_tree -End - -Definition isPair_def[simp]: - isPair (Pair _ _) = T ∧ - isPair _ = F -End - -Definition mk_list_def: - mk_list [] = Str (strlit "") ∧ - mk_list (x::xs) = Pair x (mk_list xs) -End - -(* pretty printing *) - -Definition dest_list_def: - dest_list (Pair x y) = (let (l,e) = dest_list y in (x::l,e)) ∧ - dest_list other = ([],other) -End - -Theorem dest_list_size[local]: - ∀v e l. - (l,e) = dest_list v ⇒ - str_tree_size e ≤ str_tree_size v ∧ - (isPair v ⇒ str_tree_size e < str_tree_size v) ∧ - ∀x. MEM x l ⇒ str_tree_size x < str_tree_size v -Proof - Induct_on ‘v’ \\ fs [dest_list_def] - \\ pairarg_tac \\ fs [] \\ rw [] \\ res_tac \\ fs [fetch "-" "str_tree_size_def"] -QED - -Datatype: - pretty = Parenthesis pretty - | String mlstring - | Append pretty bool pretty - | Size num pretty -End - -Definition newlines_def: - newlines [] = String (strlit "") ∧ - newlines [x] = x ∧ - newlines (x::xs) = Append x T (newlines xs) -End - -Definition v2pretty_def: - v2pretty v = - case v of - | Str s => String s - | GrabLine w => Size 100000 (v2pretty w) - | _ => - let (l,e) = dest_list v in - Parenthesis - (if e = Str (strlit "") then - newlines (MAP v2pretty l) - else - Append (newlines (MAP v2pretty l)) T - (Append (String (strlit " . ")) T (v2pretty e))) -Termination - WF_REL_TAC ‘measure str_tree_size’ \\ rw [] - \\ imp_res_tac dest_list_size \\ fs [fetch "-" "str_tree_size_def"] -End - -Definition get_size_def: - get_size (Size n x) = n ∧ - get_size (Append x _ y) = get_size x + get_size y + 1 ∧ - get_size (Parenthesis x) = get_size x + 2 ∧ - get_size _ = 0 -End - -Definition get_next_size_def: - get_next_size (Size n x) = n ∧ - get_next_size (Append x _ y) = get_next_size x ∧ - get_next_size (Parenthesis x) = get_next_size x + 2 ∧ - get_next_size _ = 0 -End - -Definition annotate_def: - annotate (String s) = Size (strlen s) (String s) ∧ - annotate (Parenthesis b) = - (let b = annotate b in - Size (get_size b + 2) (Parenthesis b)) ∧ - annotate (Append b1 n b2) = - (let b1 = annotate b1 in - let b2 = annotate b2 in - (* Size (get_size b1 + get_size b2 + 1) *) (Append b1 n b2)) ∧ - annotate (Size n b) = Size n (annotate b) -End - -Definition remove_all_def: - remove_all (Parenthesis v) = Parenthesis (remove_all v) ∧ - remove_all (String v1) = String v1 ∧ - remove_all (Append v2 _ v3) = Append (remove_all v2) F (remove_all v3) ∧ - remove_all (Size v4 v5) = remove_all v5 -End - -Definition smart_remove_def: - smart_remove i k (Size n b) = - (if k + n < 70 then remove_all b else smart_remove i k b) ∧ - smart_remove i k (Parenthesis v) = Parenthesis (smart_remove (i+1) (k+1) v) ∧ - smart_remove i k (String v1) = String v1 ∧ - smart_remove i k (Append v2 b v3) = - let n2 = get_size v2 in - let n3 = get_next_size v3 in - if k + n2 + n3 < 50 then - Append (smart_remove i k v2) F (smart_remove i (k+n2) v3) - else - Append (smart_remove i k v2) T (smart_remove i i v3) -End - -Definition flatten_def: - flatten indent (Size n p) s = flatten indent p s ∧ - flatten indent (Parenthesis p) s = - strlit "(" :: flatten (concat [indent; strlit " "]) p (strlit ")" :: s) ∧ - flatten indent (String t) s = t :: s ∧ - flatten indent (Append p1 b p2) s = - flatten indent p1 ((if b then indent else strlit " ") :: flatten indent p2 s) -End - -Definition v2strs_def: - v2strs end v = flatten (strlit "\n") (smart_remove 0 0 (annotate (v2pretty v))) [end] -End - -Theorem test1[local]: - concat (v2strs (strlit "") - (Pair (Str (strlit "hello")) - (Pair (Str (strlit "there")) (Str (strlit ""))))) = - strlit "(hello there)" -Proof - EVAL_TAC -QED - -Theorem test2[local]: - concat (v2strs (strlit "") - (mk_list (Str (strlit "test") :: - MAP GrabLine [Str (strlit "hi"); Str (strlit "there")]))) = - strlit "(test\n hi\n there)" -Proof - EVAL_TAC -QED - -(* -Definition vs2str_def: - vs2str [] coms = "\n" ∧ - vs2str (x::xs) coms = - ((case coms of [] => [] | (c::cs) => if is_comment c then c else []) ++ - ("\n" ++ (v2str x ++ ("\n" ++ vs2str xs (case coms of [] => [] | c::cs => cs))))) -End -*) - diff --git a/pancake/pan_passesScript.sml b/pancake/pan_passesScript.sml index d4e4491269..af01ab2eeb 100644 --- a/pancake/pan_passesScript.sml +++ b/pancake/pan_passesScript.sml @@ -318,7 +318,7 @@ End Definition pan_to_strs_def: pan_to_strs xs = map_to_append - (v2strs «\n\n» ∘ display_to_str_tree ∘ pan_fun_to_display) xs + (str_tree_to_strs «\n\n» ∘ display_to_str_tree ∘ pan_fun_to_display) xs End (* crep *) @@ -476,7 +476,7 @@ End Definition crep_to_strs_def: crep_to_strs xs = map_to_append - (v2strs «\n\n» ∘ display_to_str_tree ∘ crep_fun_to_display) xs + (str_tree_to_strs «\n\n» ∘ display_to_str_tree ∘ crep_fun_to_display) xs End (* loop *) @@ -626,7 +626,7 @@ End Definition loop_to_strs_def: loop_to_strs names xs = map_to_append - (v2strs «\n\n» ∘ display_to_str_tree ∘ loop_fun_to_display names) xs + (str_tree_to_strs «\n\n» ∘ display_to_str_tree ∘ loop_fun_to_display names) xs End Definition any_pan_prog_pp_def: From 562c19aa34bd5a578e3e7d758441f217558da864 Mon Sep 17 00:00:00 2001 From: Magnus Myreen Date: Tue, 16 Dec 2025 13:30:43 +0100 Subject: [PATCH 35/42] Minor fixes --- basis/pure/basis_cvScript.sml | 5 +- cv_translator/to_data_cvScript.sml | 92 ------------------------------ developers/bin/README.md | 11 ---- pancake/semantics/README.md | 12 ++-- 4 files changed, 10 insertions(+), 110 deletions(-) diff --git a/basis/pure/basis_cvScript.sml b/basis/pure/basis_cvScript.sml index 6655c32066..5183eb8245 100644 --- a/basis/pure/basis_cvScript.sml +++ b/basis/pure/basis_cvScript.sml @@ -3,7 +3,7 @@ *) Theory basis_cv[no_sig_docs] Ancestors - cv_std + mlsexp cv_std Libs preamble cv_transLib @@ -51,3 +51,6 @@ QED val _ = cv_trans (mlintTheory.toString_def |> SRULE [Num_ABS]); val _ = cv_trans mlintTheory.num_to_str_def; +val _ = cv_auto_trans (mlsexpTheory.smart_remove_def |> SRULE [GSYM GREATER_DEF]); +val _ = cv_auto_trans mlsexpTheory.v2pretty_def; +val _ = cv_auto_trans mlsexpTheory.str_tree_to_strs_def; diff --git a/cv_translator/to_data_cvScript.sml b/cv_translator/to_data_cvScript.sml index 8148098c44..65ec98a67d 100644 --- a/cv_translator/to_data_cvScript.sml +++ b/cv_translator/to_data_cvScript.sml @@ -2727,97 +2727,6 @@ QED val _ = cv_auto_trans (to_data_all_def |> REWRITE_RULE [bvi_to_data_compile_sing]); (* Explorer *) -val _ = cv_auto_trans (str_treeTheory.smart_remove_def |> SRULE [GSYM GREATER_DEF]); - -Theorem dest_list_size_lemma[local]: - ∀x v w. - (v,w) = dest_list x ⇒ - list_size str_tree_size v + str_tree_size w ≤ str_tree_size x -Proof - Induct \\ gvs [str_treeTheory.dest_list_def] - \\ gvs [str_treeTheory.str_tree_size_def,list_size_def] - \\ pairarg_tac \\ gvs [str_treeTheory.str_tree_size_def,list_size_def] -QED - -Definition v2pretty_sing_def: - v2pretty_sing v = - (case v of - | Str s => String s - | GrabLine w => Size 100000 (v2pretty_sing w) - | Pair h t => let (rest,e) = dest_list t in - Parenthesis - (if e = Str «» then - newlines (v2pretty_sing h :: v2pretty_list rest) - else - Append (newlines (v2pretty_sing h :: v2pretty_list rest)) T - (Append (String « . ») T (v2pretty_sing e)))) ∧ - v2pretty_list [] = [] ∧ - v2pretty_list (x::xs) = v2pretty_sing x :: v2pretty_list xs -Termination - WF_REL_TAC ‘measure $ λx. case x of - | INL e => str_tree$str_tree_size e - | INR e => list_size str_tree$str_tree_size e’ - \\ rw [] \\ gvs [str_treeTheory.dest_list_def] - \\ imp_res_tac dest_list_size_lemma - \\ gvs [str_treeTheory.str_tree_size_def,list_size_def] -End - -Theorem v2pretty_eq_v2pretty_sing: - (∀v. v2pretty v = v2pretty_sing v) ∧ - (∀v. MAP v2pretty v = v2pretty_list v) -Proof - ho_match_mp_tac v2pretty_sing_ind \\ rpt strip_tac - \\ once_rewrite_tac [v2pretty_sing_def] \\ fs [] - \\ simp [Once str_treeTheory.v2pretty_def] - \\ TOP_CASE_TAC \\ gvs[] - \\ pairarg_tac \\ gvs [] \\ rw [SF ETA_ss] - \\ pairarg_tac >> gvs[str_treeTheory.dest_list_def] -QED - -val _ = cv_trans str_treeTheory.dest_list_def; - -val cv_str_tree_dest_list_def = fetch "-" "cv_str_tree_dest_list_def"; - -Theorem cv_size_cv_fst_snd: - cv_size (cv_fst z) + cv_size (cv_snd z) ≤ cv_size z -Proof - Cases_on`z`>>cv_termination_tac -QED - -Theorem cv_str_tree_dest_list_size[local]: - ∀v x1 x2. - cv_str_tree_dest_list v = cv$Pair x1 x2 ⇒ - cv_size x1 < cv_size v ∧ - cv_size x2 ≤ cv_size v -Proof - ho_match_mp_tac (fetch "-" "cv_str_tree_dest_list_ind") - \\ rw[] - \\ pop_assum mp_tac - \\ simp [Once cv_str_tree_dest_list_def] - \\ rw[] - \\ cv_termination_tac - \\ Cases_on`k` \\ gvs[] - \\ assume_tac cv_size_cv_fst_snd - \\ gvs[] -QED - -val pre = cv_auto_trans_pre_rec "" v2pretty_sing_def - (WF_REL_TAC ‘measure $ λx. case x of INL v => cv_size v | INR v => cv_size v’ - \\ cv_termination_tac \\ Cases_on ‘k’ \\ gvs [] - \\ imp_res_tac cv_str_tree_dest_list_size - \\ assume_tac cv_size_cv_fst_snd \\ gvs []); - -Theorem v2pretty_sing_pre[cv_pre]: - (∀v. v2pretty_sing_pre v) ∧ - (∀v. v2pretty_list_pre v) -Proof - ho_match_mp_tac v2pretty_sing_ind - \\ rw [] \\ simp [Once pre] \\ gvs [] -QED - -val _ = cv_trans (v2pretty_eq_v2pretty_sing |> CONJUNCT1); - -val _ = cv_auto_trans str_treeTheory.v2strs_def; val _ = cv_trans_pre "" jsonLangTheory.num_to_hex_digit_def; @@ -2949,4 +2858,3 @@ val _ = cv_auto_trans presLangTheory.word_exp_to_display_def; val _ = cv_auto_trans presLangTheory.word_prog_to_display_def; val _ = cv_auto_trans presLangTheory.stack_prog_to_display_def; - diff --git a/developers/bin/README.md b/developers/bin/README.md index b399ff7b25..76a3bf486a 100644 --- a/developers/bin/README.md +++ b/developers/bin/README.md @@ -1,13 +1,2 @@ This directory represents a stage in the build sequence where the latest available cake binary is downloaded to perform testing and bootstrapping. - -[basis_ffi.c](basis_ffi.c): -Implements the foreign function interface (FFI) used in the CakeML basis -library, as a thin wrapper around the relevant system calls. - -[hello.cml](hello.cml): -A simple hello world program in CakeML - -[repl_boot.cml](repl_boot.cml): -This file gives the CakeML REPL multi-line input and file loading -capabilities. diff --git a/pancake/semantics/README.md b/pancake/semantics/README.md index 5e5c9b9382..88bf003750 100644 --- a/pancake/semantics/README.md +++ b/pancake/semantics/README.md @@ -12,12 +12,6 @@ Properties of loopLang and loopSem [loopSemScript.sml](loopSemScript.sml): The formal semantics of loopLang -[panItreePropsScript.sml](panItreePropsScript.sml): -Props for Pancake ITree semantics and correspondence proof. - -[panItreeSemScript.sml](panItreeSemScript.sml): -An itree semantics for Pancake. - [panPropsScript.sml](panPropsScript.sml): panLang Properties @@ -26,3 +20,9 @@ Semantics of panLang [pan_commonPropsScript.sml](pan_commonPropsScript.sml): Common Properties for Pancake ILS + +[pan_itreePropsScript.sml](pan_itreePropsScript.sml): +Misc Lemmas for Pancake ITree semantics and correspondence proof. + +[pan_itreeSemScript.sml](pan_itreeSemScript.sml): +An itree semantics for Pancake. From 9e6197873ae2ce263dcdcb9c4c58c34dd3e0d828 Mon Sep 17 00:00:00 2001 From: Magnus Myreen Date: Tue, 16 Dec 2025 17:26:05 +0100 Subject: [PATCH 36/42] Start using Sexp module for Candle print_thm --- basis/pure/mlsexpScript.sml | 38 ++ candle/standard/ml_kernel/Holmakefile | 1 - candle/standard/ml_kernel/README.md | 3 - candle/standard/ml_kernel/lisp/Holmakefile | 9 - candle/standard/ml_kernel/lisp/README.md | 13 - .../ml_kernel/lisp/lisp_parsingScript.sml | 85 --- .../lisp/lisp_parsing_proofsScript.sml | 561 ------------------ .../ml_kernel/lisp/lisp_printingScript.sml | 172 ------ .../ml_kernel/lisp/lisp_valuesScript.sml | 135 ----- candle/standard/ml_kernel/lisp/readmePrefix | 1 - candle/standard/ml_kernel/print_thmScript.sml | 232 ++++---- 11 files changed, 146 insertions(+), 1104 deletions(-) delete mode 100644 candle/standard/ml_kernel/lisp/Holmakefile delete mode 100644 candle/standard/ml_kernel/lisp/README.md delete mode 100644 candle/standard/ml_kernel/lisp/lisp_parsingScript.sml delete mode 100644 candle/standard/ml_kernel/lisp/lisp_parsing_proofsScript.sml delete mode 100644 candle/standard/ml_kernel/lisp/lisp_printingScript.sml delete mode 100644 candle/standard/ml_kernel/lisp/lisp_valuesScript.sml delete mode 100644 candle/standard/ml_kernel/lisp/readmePrefix diff --git a/basis/pure/mlsexpScript.sml b/basis/pure/mlsexpScript.sml index 242accc9b4..badc0fb606 100644 --- a/basis/pure/mlsexpScript.sml +++ b/basis/pure/mlsexpScript.sml @@ -136,6 +136,16 @@ Termination \\ imp_res_tac read_symbol_length \\ fs[] End +Theorem lex_aux_length: + ∀depth input acc e rest. + lex_aux depth input acc = INR (e,rest) ⇒ LENGTH rest ≤ LENGTH input +Proof + ho_match_mp_tac lex_aux_ind \\ rw [] + \\ gvs [lex_aux_def,AllCaseEqs()] + \\ imp_res_tac read_string_length \\ gvs [] + \\ imp_res_tac read_symbol_length \\ gvs [] +QED + (* Tokenizes (at most) one S-expression, and returns the rest of the input. Fails with an error message if parentheses are unbalanced or read_string fails on a string literal. *) @@ -144,6 +154,19 @@ Definition lex_def: lex_aux 0 input [] End +Theorem lex_IMP_LENGTH_LESS: + ∀input x rest. lex input = INR (x,rest) ⇒ LENGTH rest < LENGTH input ∨ x = [] +Proof + simp [AllCaseEqs(),PULL_EXISTS,lex_def] + \\ rpt gen_tac + \\ Cases_on ‘input’ + \\ fs [lex_aux_def,AllCaseEqs()] + \\ CCONTR_TAC \\ gvs [] + \\ imp_res_tac lex_aux_length \\ gvs [] + \\ imp_res_tac read_string_length \\ gvs [] + \\ imp_res_tac read_symbol_length \\ gvs [] +QED + Theorem test_lex[local]: lex " 1 2 3 " = INR ([SYMBOL «1»]," 2 3 ") ∧ lex "\"\n \" 2" = INR ([SYMBOL «\n »]," 2") ∧ @@ -177,6 +200,21 @@ Definition parse_def: | (v::_) => INR (v, rest) End +Theorem parse_IMP_LENGTH_LESS: + ∀input x rest. parse input = INR (x,rest) ⇒ LENGTH rest < LENGTH input +Proof + rw [parse_def,AllCaseEqs(),PULL_EXISTS] + \\ imp_res_tac lex_IMP_LENGTH_LESS + \\ gvs [parse_aux_def] +QED + +Theorem parse_space: + parse (#"\n" :: xs) = parse xs +Proof + simp [parse_def,lex_def] + \\ simp [Once lex_aux_def,EVAL “isSpace #"\n"”] +QED + Theorem test_parse[local]: parse " 1 2 3 " = INR (Atom «1»," 2 3 ") ∧ parse "(1 2 3)" = INR (Expr [Atom «1»; Atom «2»; Atom «3»],"") ∧ diff --git a/candle/standard/ml_kernel/Holmakefile b/candle/standard/ml_kernel/Holmakefile index 6688e08d4c..98cc4411a8 100644 --- a/candle/standard/ml_kernel/Holmakefile +++ b/candle/standard/ml_kernel/Holmakefile @@ -1,5 +1,4 @@ INCLUDES = $(CAKEMLDIR)/misc\ - lisp\ $(CAKEMLDIR)/semantics\ $(CAKEMLDIR)/translator\ $(CAKEMLDIR)/translator/monadic\ diff --git a/candle/standard/ml_kernel/README.md b/candle/standard/ml_kernel/README.md index 64b255d638..2a22b355ea 100644 --- a/candle/standard/ml_kernel/README.md +++ b/candle/standard/ml_kernel/README.md @@ -1,9 +1,6 @@ Implementation of the monadic functions in (deeply embedded) CakeML, generated by the translator (proof-producing synthesis). -[lisp](lisp): -Parsing and pretty printing of simple s-expressions - [ml_hol_initScript.sml](ml_hol_initScript.sml): Prove that the state of the kernel can be initialised in a way that meets the invariants (STATE and HOL_STORE). diff --git a/candle/standard/ml_kernel/lisp/Holmakefile b/candle/standard/ml_kernel/lisp/Holmakefile deleted file mode 100644 index 6f957a7585..0000000000 --- a/candle/standard/ml_kernel/lisp/Holmakefile +++ /dev/null @@ -1,9 +0,0 @@ -INCLUDES = $(CAKEMLDIR)/misc - -all: $(DEFAULT_TARGETS) README.md -.PHONY: all - -README_SOURCES = $(wildcard *Script.sml) $(wildcard *Lib.sml) $(wildcard *Syntax.sml) -DIRS = $(wildcard */) -README.md: $(CAKEMLDIR)/developers/readme_gen readmePrefix $(patsubst %,%readmePrefix,$(DIRS)) $(README_SOURCES) - $(protect $(CAKEMLDIR)/developers/readme_gen) $(README_SOURCES) diff --git a/candle/standard/ml_kernel/lisp/README.md b/candle/standard/ml_kernel/lisp/README.md deleted file mode 100644 index eff592140c..0000000000 --- a/candle/standard/ml_kernel/lisp/README.md +++ /dev/null @@ -1,13 +0,0 @@ -Parsing and pretty printing of simple s-expressions - -[lisp_parsingScript.sml](lisp_parsingScript.sml): -Parsing of Lisp s-expressions - -[lisp_parsing_proofsScript.sml](lisp_parsing_proofsScript.sml): -Proof that pretty printong then parsing returns the same - -[lisp_printingScript.sml](lisp_printingScript.sml): -Pretty printing for Lisp s-expressions - -[lisp_valuesScript.sml](lisp_valuesScript.sml): -Definition of simple Lisp s-expressions diff --git a/candle/standard/ml_kernel/lisp/lisp_parsingScript.sml b/candle/standard/ml_kernel/lisp/lisp_parsingScript.sml deleted file mode 100644 index d16d8215ac..0000000000 --- a/candle/standard/ml_kernel/lisp/lisp_parsingScript.sml +++ /dev/null @@ -1,85 +0,0 @@ -(* - Parsing of Lisp s-expressions -*) -Theory lisp_parsing -Ancestors - arithmetic list pair finite_map string lisp_values -Libs - term_tactic - -(* lexing *) - -Datatype: - token = OPEN | CLOSE | DOT | NUM num | QUOTE num -End - -Definition read_num_def: - read_num l h f x acc [] = (acc,[]) ∧ - read_num l h f x acc (c::cs) = - if ORD l ≤ ORD c ∧ ORD c ≤ ORD h then - read_num l h f x (f * acc + (ORD c - x)) cs - else (acc,c::cs) -End - -Theorem read_num_length: - ∀l h xs n ys f acc x. - read_num l h f x acc xs = (n,ys) ⇒ - LENGTH ys ≤ LENGTH xs ∧ (xs ≠ ys ⇒ LENGTH ys < LENGTH xs) -Proof - Induct_on ‘xs’ \\ rw [read_num_def] - \\ TRY pairarg_tac \\ fs [] \\ rw [] \\ res_tac \\ fs [] -QED - -Definition end_line_def: - end_line [] = [] ∧ - end_line (c::cs) = if c = #"\n" then cs else end_line cs -End - -Theorem end_line_length: - ∀cs. STRLEN (end_line cs) < SUC (STRLEN cs) -Proof - Induct \\ rw [end_line_def] -QED - -Definition lex_def: - lex q [] acc = acc ∧ - lex q (c::cs) acc = - if MEM c " \t\n" then lex NUM cs acc else - if c = #"#" then lex NUM (end_line cs) acc else - if c = #"." then lex NUM cs (DOT::acc) else - if c = #"(" then lex NUM cs (OPEN::acc) else - if c = #")" then lex NUM cs (CLOSE::acc) else - if c = #"'" then lex QUOTE cs acc else - let (n,rest) = read_num #"0" #"9" 10 (ORD #"0") 0 (c::cs) in - if rest ≠ c::cs then lex NUM rest (q n::acc) else - let (n,rest) = read_num #"*" #"z" 256 0 0 (c::cs) in - if rest ≠ c::cs then lex NUM rest (q n::acc) else - lex NUM cs acc -Termination - WF_REL_TAC ‘measure (LENGTH o FST o SND)’ \\ rw [] - \\ imp_res_tac (GSYM read_num_length) \\ fs [end_line_length] -End - -Definition lexer_def: - lexer input = lex NUM input [] -End - - -(* parsing *) - -Definition quote_def: - quote n = list [Num (name "'"); Num n] -End - -Definition parse_def: - parse [] x s = x ∧ - parse (CLOSE :: rest) x s = parse rest (Num 0) (x::s) ∧ - parse (OPEN :: rest) x s = - (case s of [] => parse rest x s - | (y::ys) => parse rest (Pair x y) ys) ∧ - parse (NUM n :: rest) x s = parse rest (Pair (Num n) x) s ∧ - parse (QUOTE n :: rest) x s = parse rest (Pair (quote n) x) s ∧ - parse (DOT :: rest) x s = parse rest (head x) s -End - - diff --git a/candle/standard/ml_kernel/lisp/lisp_parsing_proofsScript.sml b/candle/standard/ml_kernel/lisp/lisp_parsing_proofsScript.sml deleted file mode 100644 index ef33466f36..0000000000 --- a/candle/standard/ml_kernel/lisp/lisp_parsing_proofsScript.sml +++ /dev/null @@ -1,561 +0,0 @@ -(* - Proof that pretty printong then parsing returns the same -*) -Theory lisp_parsing_proofs -Ancestors - arithmetic list pair finite_map string lisp_values - lisp_printing lisp_parsing - -Definition dest_quote_def: - dest_quote v = - case v of - | (Pair x (Pair (Num n) (Num 0))) => - (if x = Num (name "'") then SOME (QUOTE n) else NONE) - | _ => NONE -End - -Definition v2toks_def: - v2toks v = - case v of Num n => [NUM n] | _ => - case dest_quote v of SOME s => [s] | NONE => - let (l,e) = dest_list v in - [OPEN] ++ - (if e = Num 0 then FLAT (MAP v2toks l) else - FLAT (MAP v2toks l) ++ [DOT] ++ (v2toks e)) ++ - [CLOSE] -Termination - WF_REL_TAC ‘measure lisp_v_size’ \\ rw [] - \\ imp_res_tac dest_list_size \\ fs [lisp_v_size_def,isNum_def] -End - -Definition pretty2toks_def: - pretty2toks (Str s) = lexer s ∧ - pretty2toks (Append p _ q) = pretty2toks q ++ pretty2toks p ∧ - pretty2toks (Parenthesis p) = [CLOSE] ++ pretty2toks p ++ [OPEN] ∧ - pretty2toks (Size _ p) = pretty2toks p -End - -Theorem flatten_acc: - ∀indent p acc. flatten indent p acc = flatten indent p [] ++ acc -Proof - Induct_on ‘p’ \\ simp_tac std_ss [flatten_def] - \\ rpt (pop_assum (once_rewrite_tac o single)) \\ fs [] -QED - -Theorem lex_acc: - ∀p xs acc. lex p xs acc = lex p xs [] ++ acc -Proof - qsuff_tac ‘∀p xs (a:token list) acc. lex p xs [] ++ acc = lex p xs acc’ - THEN1 metis_tac [] - \\ ho_match_mp_tac lex_ind \\ rw [] - \\ once_rewrite_tac [lex_def] - \\ simp_tac (srw_ss()) [] - \\ rpt IF_CASES_TAC - \\ rw [] \\ fs [] \\ rw [] - \\ rpt (pairarg_tac \\ fs []) \\ rw [] - \\ rpt (pop_assum mp_tac) - \\ metis_tac [APPEND,APPEND_ASSOC] -QED - -Theorem read_num_suffix: - ∀a b c d xs y ys n1 n2 acc rest1 rest2. - read_num a b c d acc (xs ++ y::ys) = (n1,rest1) ∧ - read_num a b c d acc xs = (n2,rest2) ∧ - ~(ORD a ≤ ORD y ∧ ORD y ≤ ORD b) ⇒ - n1 = n2 ∧ rest1 = rest2 ++ y::ys -Proof - Induct_on ‘xs’ \\ fs [read_num_def] THEN1 rw [] - \\ rpt gen_tac \\ rpt (pairarg_tac \\ fs []) - \\ IF_CASES_TAC \\ fs [] \\ rw [] \\ fs[] - \\ first_x_assum drule \\ fs [] -QED - -Theorem read_num_EVERY_IMP: - ∀s x1 x2 y1 y2 y3 n rest. - read_num x1 x2 y1 y2 y3 s = (n,rest) ∧ EVERY p s ⇒ EVERY p rest -Proof - Induct \\ fs [read_num_def] \\ rw [] \\ res_tac \\ fs [] -QED - -Theorem lex_APPEND_split: - ∀c. - ORD c < ORD #"*" ⇒ - ∀xs ys acc p. - EVERY (\x. x ≠ CHR 35) xs ⇒ - lex p (STRCAT xs (STRING c ys)) acc = lex NUM (c::ys) (lex p xs acc) -Proof - gen_tac \\ strip_tac \\ gen_tac \\ completeInduct_on ‘LENGTH xs’ - \\ rw [] \\ fs [PULL_FORALL] - \\ Cases_on ‘xs’ - THEN1 (fs [lex_def] \\ rw [] \\ fs [read_num_def]) - \\ fs [] - \\ CONV_TAC (RATOR_CONV (SIMP_CONV std_ss [lex_def])) - \\ CONV_TAC (PATH_CONV "rr" (SIMP_CONV std_ss [lex_def])) - \\ rw [] \\ fs [] - \\ pairarg_tac \\ fs [] - \\ once_rewrite_tac [EQ_SYM_EQ] - \\ pairarg_tac \\ fs [] - \\ qabbrev_tac ‘ss = h::t’ - \\ ‘STRING h (STRCAT (STRCAT t [c]) ys) = ss ++ c::ys’ by fs [] - \\ qpat_x_assum ‘_ = (n,rest)’ mp_tac - \\ asm_rewrite_tac [] \\ strip_tac - \\ drule read_num_suffix \\ fs [] - \\ strip_tac \\ IF_CASES_TAC \\ fs [] - THEN1 - (‘LENGTH rest' < LENGTH (h::t)’ by - (imp_res_tac read_num_length \\ rfs [] \\ rw [] \\ fs []) - \\ fs [AND_IMP_INTRO] - \\ first_x_assum drule - \\ imp_res_tac read_num_EVERY_IMP \\ rw []) - \\ pairarg_tac \\ fs [] - \\ pairarg_tac \\ fs [] \\ pop_assum mp_tac - \\ rewrite_tac [GSYM APPEND_ASSOC,APPEND] \\ strip_tac - \\ drule read_num_suffix \\ fs [] - \\ strip_tac \\ fs [] - \\ IF_CASES_TAC \\ fs [] - \\ ‘LENGTH rest'' < LENGTH (h::t)’ by - (imp_res_tac read_num_length \\ rfs [] \\ rw [] \\ fs []) - \\ fs [] \\ first_x_assum drule - \\ imp_res_tac read_num_EVERY_IMP \\ rw [] -QED - -Theorem lex_indent: - ∀t. EVERY (λc. c = #" " ∨ c = #"\n") t ⇒ - lex NUM (t ++ s) acc = lex NUM s acc -Proof - Induct \\ fs [] \\ rw [] \\ fs [] \\ fs [lex_def] -QED - -Definition no_hash_def[simp]: - no_hash (Str s) = EVERY (λx. x ≠ #"#") s ∧ - no_hash (Append p1 b p2) = (no_hash p1 ∧ no_hash p2) ∧ - no_hash (Parenthesis p) = no_hash p ∧ - no_hash (Size _ p) = no_hash p -End - -Theorem no_hash_flatten: - ∀x s1 s2. - EVERY (λx. x ≠ #"#") s1 ∧ EVERY (λx. x ≠ #"#") s2 ⇒ - EVERY (λx. x ≠ #"#") (flatten s1 x s2) = no_hash x -Proof - Induct_on ‘x’ \\ fs [flatten_def] \\ rw [] - \\ once_rewrite_tac [flatten_acc] \\ fs [] -QED - -Theorem lex_flatten: - ∀p indent. - EVERY (λc. MEM c " \n") indent ∧ indent ≠ [] ∧ no_hash p ⇒ - lex NUM (flatten indent p []) [] = pretty2toks p -Proof - Induct - THEN1 - (rw [flatten_def,pretty2toks_def,lex_def] - \\ once_rewrite_tac [lex_acc] \\ fs [] - \\ qmatch_goalsub_abbrev_tac ‘flatten ss’ - \\ first_x_assum (qspec_then ‘ss’ mp_tac) - \\ fs [Abbr‘ss’] \\ rw [] - \\ once_rewrite_tac [flatten_acc] \\ fs [] - \\ ‘ORD #")" < ORD #"*"’ by EVAL_TAC - \\ drule lex_APPEND_split - \\ ‘EVERY (λx. x ≠ #"#") (flatten (STRCAT indent " ") p "")’ by - (‘EVERY (λx. x ≠ #"#") "" ∧ - EVERY (λx. x ≠ #"#") (STRCAT indent " ")’ by - (fs [] \\ fs [EVERY_MEM] \\ strip_tac \\ res_tac \\ fs []) - \\ imp_res_tac no_hash_flatten) - \\ disch_then drule \\ fs [] - \\ disch_then (qspecl_then [‘[]’,‘[]’] mp_tac) - \\ fs [] \\ rw [] \\ EVAL_TAC) - THEN1 fs [flatten_def,pretty2toks_def,lexer_def] - \\ fs [pretty2toks_def,flatten_def] - \\ rpt strip_tac - \\ qmatch_goalsub_abbrev_tac ‘white_space:string ++ _’ - \\ ‘EVERY (λc. c = #" " ∨ c = #"\n") white_space ∧ white_space ≠ []’ - by (fs [Abbr‘white_space’] \\ rw []) - \\ ‘∃hws tws. white_space = hws :: tws’ by - (Cases_on ‘white_space’ \\ fs []) - \\ pop_assum (fn th => once_rewrite_tac [th] \\ assume_tac (GSYM th)) - \\ ‘ORD hws < ORD #"*"’ by gvs[] - \\ drule lex_APPEND_split - \\ once_rewrite_tac [flatten_acc] - \\ ‘EVERY (λc. c = #" " ∨ c = #"\n") tws’ by gvs[] - \\ simp[] - \\ ‘EVERY (λx. x ≠ #"#") (flatten indent p "")’ - by (irule $ iffRL no_hash_flatten >> gs[EVERY_MEM] >> strip_tac >> - first_x_assum drule >> simp[]) - \\ asm_simp_tac std_ss [GSYM APPEND_ASSOC] - \\ once_rewrite_tac [lex_acc] - \\ gvs [lex_indent, lex_def] -QED - -Theorem pretty2toks_annotate: - ∀p. pretty2toks (annotate p) = pretty2toks p -Proof - Induct \\ fs [annotate_def,pretty2toks_def] -QED - -Theorem pretty2toks_smart_remove_all: - ∀p. pretty2toks (remove_all p) = pretty2toks p -Proof - Induct \\ fs [remove_all_def,pretty2toks_def] -QED - -Theorem pretty2toks_smart_remove: - ∀p n i. pretty2toks (smart_remove i n p) = pretty2toks p -Proof - Induct \\ fs [smart_remove_def,pretty2toks_def] - \\ rw [pretty2toks_smart_remove_all,pretty2toks_def] -QED - -Theorem read_num_append: - ∀xs acc ys. - read_num c1 c2 x y acc (xs ++ ys) = - let (k,t) = read_num c1 c2 x y acc xs in - if t = [] then read_num c1 c2 x y k ys else (k,t ++ ys) -Proof - Induct \\ fs [read_num_def] \\ rw [] -QED - -Theorem read_num_num2str: - ∀n acc. - read_num #"0" #"9" 10 48 acc (num2str n) = (acc * 10 ** LENGTH (num2str n) + n,[]) -Proof - ho_match_mp_tac num2str_ind - \\ gen_tac \\ strip_tac - \\ once_rewrite_tac [num2str_def] \\ fs [] - \\ IF_CASES_TAC \\ fs [] - THEN1 fs [read_num_def] - \\ fs [read_num_append] - \\ fs [read_num_def] - \\ ‘n MOD 10 < 10’ by fs [MOD_LESS] - \\ ‘n MOD 10 + 48 < 256’ by simp [] - \\ simp [ORD_CHR] \\ rw [] - \\ once_rewrite_tac [EQ_SYM_EQ] - \\ fs [GSYM ADD1,EXP] - \\ ‘0 < 10n’ by fs [] - \\ drule DIVISION - \\ disch_then (fn th => CONV_TAC (RATOR_CONV (ONCE_REWRITE_CONV [th]))) - \\ fs [] - \\ once_rewrite_tac [ADD_COMM] - \\ once_rewrite_tac [MULT_COMM] - \\ simp [ADD_DIV_ADD_DIV] -QED - -Theorem read_num_num2ascii: - ∀n x acc. - num2ascii n = SOME x ⇒ - read_num #"*" #"z" 256 0 acc x = (acc * 256 ** LENGTH x + n,[]) -Proof - ho_match_mp_tac num2ascii_ind - \\ gen_tac \\ strip_tac - \\ once_rewrite_tac [num2ascii_def] \\ fs [] - \\ IF_CASES_TAC \\ fs [AllCaseEqs(),PULL_EXISTS] - THEN1 fs [read_num_def] - \\ rw [] \\ fs [] - \\ fs [read_num_append] - \\ fs [read_num_def] - \\ fs [GSYM ADD1,EXP] - \\ once_rewrite_tac [EQ_SYM_EQ] - \\ ‘0 < 256n’ by fs [] - \\ drule DIVISION - \\ disch_then (fn th => CONV_TAC (RATOR_CONV (ONCE_REWRITE_CONV [th]))) - \\ fs [] - \\ once_rewrite_tac [ADD_COMM] - \\ once_rewrite_tac [MULT_COMM] - \\ simp [ADD_DIV_ADD_DIV] -QED - -Theorem num2str_not_nil: - num2str n ≠ [] -Proof - once_rewrite_tac [num2str_def] \\ rw [] \\ fs [] -QED - -Theorem num2str_cons_IMP: - ∀n h t. - num2str n = STRING h t ⇒ - ORD #"0" ≤ ORD h ∧ ORD h ≤ ORD #"9" -Proof - ho_match_mp_tac num2str_ind \\ rw [] - \\ pop_assum mp_tac - \\ once_rewrite_tac [num2str_def] \\ rw [] \\ fs [] - \\ Cases_on ‘num2str (n DIV 10)’ \\ gvs[num2str_not_nil] -QED - -Theorem num2ascii_SOME_IMP: - ∀n x. - num2ascii n = SOME x ⇒ - ∃c cs. x = c::cs ∧ (ORD #"*" ≤ ORD c ∧ ORD c ≤ ORD #"z" ∧ ORD c ≠ ORD #".") -Proof - ho_match_mp_tac num2ascii_ind \\ rw [] - \\ pop_assum mp_tac - \\ once_rewrite_tac [num2ascii_def] - \\ rw [] \\ fs [AllCaseEqs()] \\ rw [] - \\ res_tac \\ fs [] -QED - -Theorem lex_name2str: - lex p (name2str n) [] = [p n] -Proof - fs [name2str_def] - \\ BasicProvers.CASE_TAC - THEN1 - (pop_assum kall_tac - \\ Cases_on ‘num2str n’ - \\ fs [lex_def,num2str_not_nil] - \\ imp_res_tac num2str_cons_IMP - \\ rw [] \\ fs [lex_def] - \\ last_assum (assume_tac o GSYM) \\ fs [] - \\ rewrite_tac [read_num_num2str] \\ fs [num2str_not_nil,lex_def]) - \\ fs [ascii_name_def,AllCaseEqs()] - \\ imp_res_tac num2ascii_SOME_IMP - \\ fs [lex_def] - \\ rw [] \\ fs [] - \\ fs [read_num_def] - \\ drule read_num_num2ascii \\ fs [read_num_def] - \\ disch_then (qspec_then ‘0’ mp_tac) \\ fs [lex_def] -QED - -Definition any_list_def: - any_list [] e = e ∧ - any_list (x::xs) e = Pair x (any_list xs e) -End - -Theorem dest_list_IMP: - ∀v l e. dest_list v = (l,e) ⇒ v = any_list l e ∧ isNum e -Proof - Induct_on ‘l’ \\ fs [] - \\ Cases_on ‘v’ \\ fs [dest_list_def,any_list_def] - \\ rw [] \\ pairarg_tac \\ fs [] \\ rw [] \\ res_tac -QED - -Theorem any_list_eq_list[simp]: - ∀xs. any_list xs (Num 0) = list xs -Proof - Induct \\ fs [any_list_def,list_def] -QED - -Theorem pretty2toks_v2pretty: - ∀h. pretty2toks (v2pretty h) = REVERSE (v2toks h) -Proof - gen_tac \\ completeInduct_on ‘lisp_v_size h’ - \\ rw [] \\ fs [PULL_FORALL] - \\ once_rewrite_tac [v2pretty_def] - \\ reverse (Cases_on ‘h’) \\ simp [] - THEN1 simp [pretty2toks_def,Once v2toks_def,lex_name2str,lexer_def] - \\ simp [Once v2toks_def] - \\ ‘~isNum (Pair v v0)’ by fs [] - \\ rename [‘lisp_v_size vv’] - \\ reverse BasicProvers.CASE_TAC - THEN1 - (fs [dest_quote_def,lisp_printingTheory.dest_quote_def,AllCaseEqs()] - \\ rw [pretty2toks_def,lex_def,lexer_def,lex_name2str]) - \\ reverse BasicProvers.CASE_TAC - THEN1 (fs [dest_quote_def,lisp_printingTheory.dest_quote_def,AllCaseEqs()] \\ rw []) - \\ pairarg_tac \\ fs [pretty2toks_def] \\ rw [] - \\ imp_res_tac dest_list_IMP \\ fs [] - \\ rpt (qpat_x_assum ‘_ = NONE’ kall_tac) - THEN1 - (rw [] \\ Induct_on ‘l’ \\ fs [EVAL “pretty2toks (newlines [])”] - \\ fs [list_def,dest_list_def] \\ rw [] - \\ pairarg_tac \\ fs [] \\ rw [] - \\ Cases_on ‘l = []’ \\ rw [] \\ fs [newlines_def] - \\ Cases_on ‘l’ \\ fs [newlines_def,list_def] - \\ fs [pretty2toks_def]) - \\ rw [] \\ Induct_on ‘l’ \\ fs [] - THEN1 - (fs [any_list_def,pretty2toks_def] - \\ Cases_on ‘e’ \\ fs [] \\ rw [] - \\ simp [dest_list_def,Once v2pretty_def,pretty2toks_def,v2toks_def,lexer_def, - EVAL “lexer " . "”,EVAL “pretty2toks (newlines [])”,lex_name2str]) - \\ fs [any_list_def,dest_list_def] \\ rw [] - \\ pairarg_tac \\ fs [] \\ rw [] - \\ Cases_on ‘l = []’ \\ rw [] \\ fs [newlines_def] - THEN1 (fs [pretty2toks_def,any_list_def] \\ EVAL_TAC) - \\ Cases_on ‘l’ \\ fs [newlines_def,any_list_def] - \\ fs [pretty2toks_def] -QED - -Theorem dropWhile_cons_imp: - ∀s c h t. - dropWhile (λx. x ≠ c) s = h::t ⇒ - ∃prefix. s = prefix ++ c :: t ∧ h = c ∧ EVERY (λx. x ≠ c) prefix -Proof - Induct \\ fs [] \\ rw [] \\ res_tac \\ fs [] -QED - -Theorem end_line_prefix: - ∀prefix xs. - EVERY (λx. x ≠ #"\n") prefix ⇒ - end_line (STRCAT prefix (STRING #"\n" xs)) = xs -Proof - Induct \\ fs [end_line_def] -QED - -Theorem lex_is_comment: - ∀c rest. is_comment c ⇒ lex NUM (c ++ rest) [] = lex NUM rest [] -Proof - ho_match_mp_tac is_comment_ind \\ rw [] - \\ fs [is_comment_def] - \\ reverse (Cases_on ‘c = #"#"’) \\ fs [] - \\ simp [Once lex_def] - \\ rename [‘dropWhile (λx. x ≠ #"\n") s’] - \\ Cases_on ‘dropWhile (λx. x ≠ #"\n") s’ \\ fs [] - \\ rw [] \\ imp_res_tac dropWhile_cons_imp \\ rw [] - \\ rewrite_tac [GSYM APPEND_ASSOC,APPEND] - \\ drule end_line_prefix \\ strip_tac \\ asm_rewrite_tac [] -QED - -Theorem num2ascii_no_hash: - ∀n x. num2ascii n = SOME x ⇒ EVERY (λx. x ≠ #"#") x -Proof - ho_match_mp_tac num2ascii_ind \\ rw [] \\ fs [] - \\ pop_assum mp_tac \\ rw [Once num2ascii_def] \\ fs [AllCaseEqs()] - \\ res_tac \\ fs [] \\ rw [] -QED - -Theorem no_hash_name2str: - EVERY (λx. x ≠ #"#") (name2str n) -Proof - fs [name2str_def] - \\ Cases_on ‘ascii_name n’ \\ fs [] - THEN1 - (pop_assum kall_tac \\ qid_spec_tac ‘n’ - \\ ho_match_mp_tac num2str_ind - \\ rw [] \\ simp [Once num2str_def] \\ rw [] - \\ ‘n MOD 10 < 10’ by fs [] - \\ ‘(n MOD 10) + 48 < 256 ∧ 35 < 256’ by decide_tac - \\ imp_res_tac CHR_11 \\ fs []) - \\ fs [ascii_name_def,AllCaseEqs()] - \\ imp_res_tac num2ascii_no_hash \\ fs [] -QED - -Theorem no_hash_smart_remove_annotate_v2pretty[simp]: - no_hash (smart_remove m n (annotate (v2pretty h))) -Proof - ‘∀p. no_hash (annotate p) = no_hash p’ by - (Induct_on ‘p’ \\ rw [annotate_def,no_hash_def]) - \\ ‘∀p. no_hash (remove_all p) = no_hash p’ by - (Induct_on ‘p’ \\ rw [remove_all_def,no_hash_def]) - \\ ‘∀m n p. no_hash (smart_remove m n p) = no_hash p’ by - (Induct_on ‘p’ \\ rw [smart_remove_def,no_hash_def]) - \\ fs [] \\ qid_spec_tac ‘h’ - \\ ho_match_mp_tac v2pretty_ind - \\ rw [] \\ once_rewrite_tac [v2pretty_def] - \\ rpt (BasicProvers.TOP_CASE_TAC \\ fs []) - \\ rpt (pairarg_tac \\ fs []) \\ rw [no_hash_def] - THEN1 - (pop_assum mp_tac \\ rpt (pop_assum kall_tac) - \\ Induct_on ‘l'’ \\ fs [newlines_def,no_hash_def] - \\ rw [] \\ last_x_assum mp_tac - \\ impl_tac THEN1 fs [] - \\ Cases_on ‘l'’ \\ fs [newlines_def]) - \\ fs [] - THEN1 - (qpat_x_assum ‘∀h._’ mp_tac \\ rpt (pop_assum kall_tac) - \\ Induct_on ‘l'’ \\ fs [newlines_def,no_hash_def] - \\ rw [] \\ last_x_assum mp_tac - \\ impl_tac THEN1 fs [] - \\ Cases_on ‘l'’ \\ fs [newlines_def]) - \\ fs [lisp_printingTheory.dest_quote_def,AllCaseEqs()] - \\ rw [no_hash_name2str] -QED - -Theorem lexer_vs2str: - ∀vs coms. lexer (vs2str vs coms) = REVERSE (FLAT (MAP v2toks vs)) -Proof - Induct THEN1 (EVAL_TAC \\ fs []) - \\ fs [vs2str_def,lexer_def] \\ gen_tac - \\ ‘EVERY (λx. x ≠ #"#") (v2str h)’ by fs [v2str_def,no_hash_flatten] - \\ drule (lex_APPEND_split |> Q.SPEC ‘#"\n"’ - |> CONV_RULE (PATH_CONV "lr" EVAL) |> REWRITE_RULE []) - \\ rewrite_tac [GSYM APPEND_ASSOC,APPEND] \\ fs [] \\ rw [] - \\ Cases_on ‘coms’ \\ fs [] - \\ TRY IF_CASES_TAC \\ fs [] - \\ TRY (drule lex_is_comment) - \\ rpt strip_tac - \\ asm_rewrite_tac [GSYM APPEND_ASSOC] - \\ simp [Once lex_def] - \\ simp [Once lex_def] - \\ once_rewrite_tac [lex_acc] - \\ fs [] \\ simp [v2str_def] - \\ fs [lex_flatten,pretty2toks_annotate,pretty2toks_smart_remove] - \\ fs [pretty2toks_v2pretty] -QED - -Theorem lisp_v_size_any_list: - ∀l e. - lisp_v_size (any_list l e) = SUM (MAP lisp_v_size l) + LENGTH l + lisp_v_size e -Proof - Induct \\ fs [any_list_def] -QED - -Theorem parse_v2toks: - ∀vs ys xs ts e. - isNum e ⇒ - parse (REVERSE (FLAT (MAP v2toks vs)) ++ ts) (any_list ys e) xs = - parse ts (any_list (vs ++ ys) e) xs -Proof - gen_tac \\ completeInduct_on ‘SUM (MAP lisp_v_size vs) + LENGTH vs’ - \\ rpt strip_tac \\ rw [] \\ fs [PULL_FORALL] - \\ Cases_on ‘vs = []’ THEN1 rw [] - \\ ‘∃x l. vs = SNOC x l’ by metis_tac [SNOC_CASES] - \\ fs [REVERSE_APPEND,SNOC_APPEND] - \\ Cases_on ‘l ≠ []’ THEN1 - (first_assum - (qspecl_then [‘[x]’,‘ys’,‘xs’,‘REVERSE (FLAT (MAP v2toks l)) ⧺ ts’,‘e’] mp_tac) - \\ impl_tac THEN1 (Cases_on ‘l’ \\ fs [SUM_APPEND]) - \\ fs [] \\ strip_tac \\ fs [] \\ pop_assum kall_tac - \\ first_assum (qspecl_then [‘l’,‘x::ys’,‘xs’,‘ts’,‘e’] mp_tac) - \\ impl_tac THEN1 fs [SUM_APPEND] - \\ fs [] \\ rewrite_tac [GSYM APPEND_ASSOC,APPEND]) - \\ once_rewrite_tac [v2toks_def] - \\ reverse (Cases_on ‘x’) \\ simp [] - THEN1 (fs [parse_def,any_list_def]) - \\ rename [‘dest_quote (Pair v1 v2)’] - \\ ‘~isNum (Pair v1 v2)’ by fs [] - \\ rename [‘~isNum v’] - \\ reverse (Cases_on ‘dest_quote v’) \\ fs [] - THEN1 - (rw [] \\ fs [dest_quote_def,AllCaseEqs()] \\ rw[] \\ fs [] - \\ fs [parse_def,any_list_def,quote_def,list_def]) - \\ pairarg_tac \\ fs [] - \\ Cases_on ‘e' = Num 0’ \\ fs [] - THEN1 - (fs [REVERSE_APPEND,parse_def] - \\ rewrite_tac [GSYM APPEND_ASSOC,APPEND] - \\ CONV_TAC (DEPTH_CONV ETA_CONV) \\ rw [] - \\ first_assum (qspecl_then [‘l''’,‘[]’,‘any_list ys e::xs’,‘OPEN::ts’,‘Num 0’] mp_tac) - \\ impl_tac THEN1 - (rw [] \\ imp_res_tac dest_list_IMP - \\ full_simp_tac std_ss [lisp_v_size_any_list] \\ fs []) - \\ strip_tac \\ fs [list_def] - \\ fs [parse_def,any_list_def] - \\ imp_res_tac dest_list_IMP \\ fs []) - \\ fs [REVERSE_APPEND,parse_def] - \\ rewrite_tac [GSYM APPEND_ASSOC,APPEND] - \\ CONV_TAC (DEPTH_CONV ETA_CONV) - \\ qpat_abbrev_tac ‘ts1 = DOT::_’ - \\ first_assum (qspecl_then [‘[e']’,‘[]’,‘any_list ys e::xs’,‘ts1’,‘Num 0’] mp_tac) - \\ impl_tac THEN1 - (rw [] \\ imp_res_tac dest_list_IMP \\ rw [] \\ fs [lisp_v_size_any_list] - \\ Cases_on ‘l''’ \\ fs [any_list_def] - \\ Cases_on ‘e'’ \\ fs [dest_list_def] - \\ pairarg_tac \\ fs []) - \\ fs [list_def] \\ strip_tac \\ fs [] - \\ qunabbrev_tac ‘ts1’ \\ fs [parse_def] - \\ rewrite_tac [GSYM APPEND_ASSOC,APPEND] \\ rw [] - \\ first_assum (qspecl_then [‘l''’,‘[]’,‘any_list ys e::xs’,‘OPEN::ts’,‘e'’] mp_tac) - \\ impl_tac - THEN1 (rw [] \\ imp_res_tac dest_list_IMP \\ rw [] \\ fs [lisp_v_size_any_list]) - \\ fs [any_list_def,parse_def] - \\ imp_res_tac dest_list_IMP \\ fs [] -QED - -Theorem parse_lexer_vs2str: - ∀vs coms ys xs. (parse (lexer (vs2str vs coms)) (Num 0) []) = list vs -Proof - fs [lexer_vs2str] \\ rw [] - \\ assume_tac (parse_v2toks |> Q.SPECL [‘vs’,‘[]’,‘[]’,‘[]’,‘Num 0’]) - \\ fs [parse_def,any_list_def,list_def] -QED - diff --git a/candle/standard/ml_kernel/lisp/lisp_printingScript.sml b/candle/standard/ml_kernel/lisp/lisp_printingScript.sml deleted file mode 100644 index 12787f74ac..0000000000 --- a/candle/standard/ml_kernel/lisp/lisp_printingScript.sml +++ /dev/null @@ -1,172 +0,0 @@ -(* - Pretty printing for Lisp s-expressions -*) -Theory lisp_printing -Ancestors - arithmetic list pair finite_map string lisp_values -Libs - mp_then - -(* pretty printing v *) - -Definition num2str_def: - num2str n = if n < 10 then [CHR (48 + n)] else - num2str (n DIV 10) ++ [CHR (48 + (n MOD 10))] -End - -Definition num2ascii_def: - num2ascii n = - if n = 0 then NONE else - let k = n MOD 256 in - if (ORD #"*" ≤ k ∧ k ≤ ORD #"z" ∧ k ≠ ORD #".") then - if n < 256 then SOME [CHR k] else - case num2ascii (n DIV 256) of - | NONE => NONE - | SOME s => SOME (s ++ [CHR k]) - else NONE -End - -Definition ascii_name_def: - ascii_name n = - case num2ascii n of - | NONE => NONE - | SOME s => let k = ORD (HD s) in - if ORD #"0" ≤ k ∧ k ≤ ORD #"9" then NONE else SOME s -End - -Definition name2str_def: - name2str n = - case ascii_name n of NONE => num2str n | SOME s => s -End - -Definition dest_quote_def: - dest_quote v = - case v of - | (Pair x (Pair (Num n) (Num 0))) => - (if x = Num (name "'") then SOME ("'" ++ name2str n) else NONE) - | _ => NONE -End - -Definition dest_list_def: - dest_list (Num n) = ([],Num n) ∧ - dest_list (Pair x y) = let (l,e) = dest_list y in (x::l,e) -End - -Theorem dest_list_size: - ∀v e l. - (l,e) = dest_list v ⇒ - lisp_v_size e ≤ lisp_v_size v ∧ - (~isNum v ⇒ lisp_v_size e < lisp_v_size v) ∧ - ∀x. MEM x l ⇒ lisp_v_size x < lisp_v_size v -Proof - Induct_on ‘v’ \\ fs [dest_list_def,isNum_def] - \\ pairarg_tac \\ fs [] \\ rw [] \\ res_tac \\ fs [lisp_v_size_def] -QED - -Datatype: - pretty = Parenthesis pretty - | Str string | Append pretty bool pretty | Size num pretty -End - -Definition newlines_def: - newlines [] = Str "" ∧ - newlines [x] = x ∧ - newlines (x::xs) = Append x T (newlines xs) -End - -Definition v2pretty_def: - v2pretty v = - case v of Num n => Str (name2str n) | _ => - case dest_quote v of SOME s => Str s | NONE => - let (l,e) = dest_list v in - Parenthesis - (if e = Num 0 then newlines (MAP v2pretty l) else - Append (newlines (MAP v2pretty l)) T - (Append (Str " . ") T (v2pretty e))) -Termination - WF_REL_TAC ‘measure lisp_v_size’ \\ rw [] - \\ imp_res_tac dest_list_size \\ fs [lisp_v_size_def,isNum_def] -End - -Definition get_size_def: - get_size (Size n x) = n ∧ - get_size (Append x _ y) = get_size x + get_size y + 1 ∧ - get_size (Parenthesis x) = get_size x + 2 ∧ - get_size _ = 0 -End - -Definition get_next_size_def: - get_next_size (Size n x) = n ∧ - get_next_size (Append x _ y) = get_next_size x ∧ - get_next_size (Parenthesis x) = get_next_size x + 2 ∧ - get_next_size _ = 0 -End - -Definition annotate_def: - annotate (Str s) = Size (LENGTH s) (Str s) ∧ - annotate (Parenthesis b) = - (let b = annotate b in - Size (get_size b + 2) (Parenthesis b)) ∧ - annotate (Append b1 n b2) = - (let b1 = annotate b1 in - let b2 = annotate b2 in - (* Size (get_size b1 + get_size b2 + 1) *) (Append b1 n b2)) ∧ - annotate (Size n b) = annotate b -End - -Definition remove_all_def: - remove_all (Parenthesis v) = Parenthesis (remove_all v) ∧ - remove_all (Str v1) = Str v1 ∧ - remove_all (Append v2 _ v3) = Append (remove_all v2) F (remove_all v3) ∧ - remove_all (Size v4 v5) = remove_all v5 -End - -Definition smart_remove_def: - smart_remove i k (Size n b) = - (if k + n < 70 then remove_all b else smart_remove i k b) ∧ - smart_remove i k (Parenthesis v) = Parenthesis (smart_remove (i+1) (k+1) v) ∧ - smart_remove i k (Str v1) = Str v1 ∧ - smart_remove i k (Append v2 b v3) = - let n2 = get_size v2 in - let n3 = get_next_size v3 in - if k + n2 + n3 < 50 then - Append (smart_remove i k v2) F (smart_remove i (k+n2) v3) - else - Append (smart_remove i k v2) T (smart_remove i i v3) -End - -Definition flatten_def: - flatten indent (Size n p) s = flatten indent p s ∧ - flatten indent (Parenthesis p) s = "(" ++ flatten (indent ++ " ") p (")" ++ s) ∧ - flatten indent (Str t) s = t ++ s ∧ - flatten indent (Append p1 b p2) s = - flatten indent p1 ((if b then indent else " ") ++ flatten indent p2 s) -End - -Definition v2str_def: - v2str v = flatten "\n" (smart_remove 0 0 (annotate (v2pretty v))) "" -End - -Definition is_comment_def: - is_comment [] = T ∧ - is_comment (c::cs) = - if c = CHR 35 then - (case dropWhile (λx. x ≠ CHR 10) cs of - | [] => F - | (c::cs) => is_comment cs) - else if c = CHR 10 then is_comment cs else F -Termination - WF_REL_TAC ‘measure LENGTH’ \\ rw [] - \\ rename [‘dropWhile f xs’] - \\ qspecl_then [‘f’,‘xs’] mp_tac LENGTH_dropWhile_LESS_EQ - \\ fs [] -End - -Definition vs2str_def: - vs2str [] coms = "\n" ∧ - vs2str (x::xs) coms = - ((case coms of [] => [] | (c::cs) => if is_comment c then c else []) ++ - ("\n" ++ (v2str x ++ ("\n" ++ vs2str xs (case coms of [] => [] | c::cs => cs))))) -End - - diff --git a/candle/standard/ml_kernel/lisp/lisp_valuesScript.sml b/candle/standard/ml_kernel/lisp/lisp_valuesScript.sml deleted file mode 100644 index f85b919860..0000000000 --- a/candle/standard/ml_kernel/lisp/lisp_valuesScript.sml +++ /dev/null @@ -1,135 +0,0 @@ -(* - Definition of simple Lisp s-expressions -*) -Theory lisp_values -Ancestors - arithmetic list string -Libs - term_tactic - -(* Values in the source semantics are binary trees where the - leaves are natural numbers (num) *) -Datatype: - lisp_v = Pair lisp_v lisp_v | Num num -End - -(* Since strings are not in the representation, we have a function that - coverts strings into numbers. Note that parsing and pretty printing - is set up so that printing reproduces these strings when possible. *) -Definition name_def: - name [] = 0 ∧ - name (c::cs) = ORD c * 256 ** (LENGTH cs) + name cs -End - -Overload Name = “λn. Num (name n)” - -(* Lists are terminated with Num 0 *) -Definition list_def[simp]: - list [] = Num 0 ∧ - list (x::xs) = Pair x (list xs) -End - -(* various convenience functions below, most are automatic rewrites [simp] *) - -Definition less_def[simp]: - less (Num n) (Num m) <=> n < m -End - -Definition plus_def[simp]: - plus (Num n) (Num m) = Num (n + m) -End - -Definition minus_def[simp]: - minus (Num n) (Num m) = Num (n - m) -End - -Definition div_def[simp]: - div (Num n) (Num m) = Num (n DIV m) -End - -Definition head_def[simp]: - head (Pair x y) = x ∧ - head v = v -End - -Definition tail_def[simp]: - tail (Pair x y) = y ∧ - tail v = v -End - -Definition cons_def[simp]: - cons x y = Pair x y -End - -Definition bool_def[simp]: - bool T = Num 1 ∧ - bool F = Num 0 -End - -Definition map_def[simp]: - map f xs = list (MAP f xs) -End - -Overload "list" = “map”; - -Definition pair_def[simp]: - pair f g (x,y) = Pair (f x) (g y) -End - -Definition option_def[simp]: - option f NONE = list [] ∧ - option f (SOME x) = list [f x] -End - -Definition char_def[simp]: - char c = Num (ORD c) -End - -Definition isNum_def[simp]: - isNum (Num n) = T ∧ isNum _ = F -End - -Definition getNum_def[simp]: - getNum (Num n) = n ∧ - getNum _ = 0 -End - -Definition el1_def[simp]: - el1 v = head (tail v) -End - -Definition el2_def[simp]: - el2 v = el1 (tail v) -End - -Definition el3_def[simp]: - el3 v = el2 (tail v) -End - -Overload isNil[inferior] = “isNum”; -Overload el0[inferior] = “head”; - -Theorem isNum_bool[simp]: - isNum (bool b) -Proof - Cases_on ‘b’ \\ EVAL_TAC -QED - -Theorem lisp_v_size_def[simp,allow_rebind] = fetch "-" "lisp_v_size_def"; - -Theorem all_macro_defs = LIST_CONJ [list_def, cons_def, bool_def, - map_def, pair_def, option_def]; - -Definition is_upper_def: - (* checks whether string (represented as num) starts with uppercase letter *) - is_upper n = - if n < 256:num then - if n < 65 (* ord A = 65 *) then F else - if n < 91 (* ord Z = 90 *) then T else F - else is_upper (n DIV 256) -End - -Definition otherwise_def[simp]: - otherwise x = x -End - diff --git a/candle/standard/ml_kernel/lisp/readmePrefix b/candle/standard/ml_kernel/lisp/readmePrefix deleted file mode 100644 index 12183b9df2..0000000000 --- a/candle/standard/ml_kernel/lisp/readmePrefix +++ /dev/null @@ -1 +0,0 @@ -Parsing and pretty printing of simple s-expressions diff --git a/candle/standard/ml_kernel/print_thmScript.sml b/candle/standard/ml_kernel/print_thmScript.sml index 16546ed3ce..553231fee1 100644 --- a/candle/standard/ml_kernel/print_thmScript.sml +++ b/candle/standard/ml_kernel/print_thmScript.sml @@ -3,171 +3,128 @@ *) Theory print_thm Ancestors - holKernel mlstring lisp_values lisp_parsing lisp_printing - lisp_parsing_proofs + holKernel mlstring mlint mlsexp Libs preamble (* encoding into v *) -Definition nil_list_def[simp]: - nil_list [] = Name "nil" ∧ - nil_list (x::xs) = Pair x (nil_list xs) -End - -Definition str_to_v_def: - str_to_v (s:mlstring) = - let xs = explode s in - let n = name xs in - if ~NULL xs ∧ name2str n = xs then - Num n - else - list [Num (name "'"); list (MAP (λc. Num (ORD c)) xs)] -End - Definition ty_to_v_def: - ty_to_v (Tyvar s) = list [Name "Tyvar"; str_to_v s] ∧ - ty_to_v (Tyapp s tys) = - list (Name "Tyapp" :: str_to_v s :: (MAP ty_to_v tys)) + ty_to_v (Tyvar s) = Expr [Atom «Tyvar»; Atom s] ∧ + ty_to_v (Tyapp s tys) = Expr (Atom «Tyapp» :: Atom s :: (MAP ty_to_v tys)) Termination WF_REL_TAC ‘measure type_size’ End Definition term_to_v_def: - term_to_v (Var s ty) = list [Name "Var"; str_to_v s; ty_to_v ty] ∧ - term_to_v (Const s ty) = list [Name "Const"; str_to_v s; ty_to_v ty] ∧ - term_to_v (Comb f x) = list [Name "Comb"; term_to_v f; term_to_v x] ∧ - term_to_v (Abs f x) = list [Name "Abs"; term_to_v f; term_to_v x] + term_to_v (Var s ty) = Expr [Atom «Var»; Atom s; ty_to_v ty] ∧ + term_to_v (Const s ty) = Expr [Atom «Const»; Atom s; ty_to_v ty] ∧ + term_to_v (Comb f x) = Expr [Atom «Comb»; term_to_v f; term_to_v x] ∧ + term_to_v (Abs f x) = Expr [Atom «Abs»; term_to_v f; term_to_v x] End Definition thm_to_v_def: thm_to_v (Sequent ts t) = - list [Name "Sequent"; nil_list (MAP term_to_v ts); term_to_v t] + Expr [Atom «Sequent»; Expr (MAP term_to_v ts); term_to_v t] End Definition update_to_v_def: update_to_v (ConstSpec xs t) = - list [Name "ConstSpec"; - list (MAP (λ(s,t). list [str_to_v s; term_to_v t]) xs); + Expr [Atom «ConstSpec»; + Expr (MAP (λ(s,t). Expr [Atom s; term_to_v t]) xs); term_to_v t] ∧ update_to_v (TypeDefn s1 t s2 s3) = - list [Name "TypeDefn"; - str_to_v s1; term_to_v t; str_to_v s2; str_to_v s3] ∧ + Expr [Atom «TypeDefn»; + Atom s1; term_to_v t; Atom s2; Atom s3] ∧ update_to_v (NewType s n) = - list [Name "NewType"; str_to_v s; Num n] ∧ + Expr [Atom «NewType»; Atom s; Atom (mlint$num_to_str n)] ∧ update_to_v (NewConst s ty) = - list [Name "NewConst"; str_to_v s; ty_to_v ty] ∧ + Expr [Atom «NewConst»; Atom s; ty_to_v ty] ∧ update_to_v (NewAxiom t) = - list [Name "NewAxiom"; term_to_v t] + Expr [Atom «NewAxiom»; term_to_v t] End (* decoding from v *) -Definition v2list_def[simp]: - v2list (Num n) = [] ∧ - v2list (Pair x y) = x :: v2list y -End - -Definition v_to_str_def: - v_to_str (Num n) = implode (name2str n) ∧ - v_to_str x = implode (MAP (λv. CHR (getNum v)) (v2list (el1 x))) -End - -Theorem v2list_lisp_v_size[local]: - ∀x a. MEM a (v2list x) ⇒ lisp_v_size a < lisp_v_size x +Theorem MEM_IMP_sexp_size_LESS: + ∀xs a. MEM a xs ⇒ sexp_size a < list_size sexp_size xs Proof - Induct \\ simp [Once v2list_def] \\ rw [] \\ fs [] \\ res_tac \\ fs [] + Induct \\ rw [] \\ simp [] \\ res_tac \\ fs [] QED +Definition v_to_str_def[simp]: + v_to_str (Atom s) = s +End + Definition v_to_ty_def: - v_to_ty v = - let n = getNum (head v) in - if n = name "Tyapp" then - Tyapp (v_to_str (el1 v)) (MAP v_to_ty (v2list (tail (tail v)))) - else Tyvar (v_to_str (el1 v)) + v_to_ty (Expr xs) = + if HD xs = Atom «Tyvar» then + Tyvar (v_to_str (EL 1 xs)) + else if LENGTH xs < 2 then ARB else + Tyapp (v_to_str (EL 1 xs)) (MAP v_to_ty (TL (TL xs))) Termination - WF_REL_TAC ‘measure lisp_v_size’ - \\ rw [isNum_def,head_def,tail_def,lisp_v_size_def] - \\ imp_res_tac v2list_lisp_v_size - \\ Cases_on ‘v’ \\ fs [] - \\ Cases_on ‘l0’ \\ fs [] + WF_REL_TAC ‘measure sexp_size’ + \\ Cases \\ simp [] + \\ Cases_on ‘t’ \\ simp [] \\ rw [] + \\ imp_res_tac MEM_IMP_sexp_size_LESS \\ fs [] End Definition v_to_term_def: - v_to_term v = - let n = getNum (head v) in - if n = name "Comb" ∧ ~isNum v then - Comb (v_to_term (el1 v)) (v_to_term (el2 v)) - else if n = name "Abs" ∧ ~isNum v then - Abs (v_to_term (el1 v)) (v_to_term (el2 v)) - else if n = name "Var" ∧ ~isNum v then - Var (v_to_str (el1 v)) (v_to_ty (el2 v)) - else - Const (v_to_str (el1 v)) (v_to_ty (el2 v)) + v_to_term (Expr xs) = + if LENGTH xs < 3 then ARB + else if HD xs = Atom «Comb» then + Comb (v_to_term (EL 1 xs)) (v_to_term (EL 2 xs)) + else if HD xs = Atom «Abs» then + Abs (v_to_term (EL 1 xs)) (v_to_term (EL 2 xs)) + else if HD xs = Atom «Var» then + Var (v_to_str (EL 1 xs)) (v_to_ty (EL 2 xs)) + else + Const (v_to_str (EL 1 xs)) (v_to_ty (EL 2 xs)) Termination - WF_REL_TAC ‘measure lisp_v_size’ - \\ rw [isNum_def,head_def,tail_def,lisp_v_size_def] - \\ Cases_on ‘v’ \\ fs [] - \\ Cases_on ‘l0’ \\ fs [] - \\ Cases_on ‘l0'’ \\ fs [] + WF_REL_TAC ‘measure sexp_size’ \\ rw [] + \\ rpt (rename [‘LENGTH xs’] \\ Cases_on ‘xs’ \\ gvs []) End Definition v_to_thm_def: - v_to_thm v = - Sequent (MAP v_to_term (v2list (el1 v))) (v_to_term (el2 v)) + v_to_thm (Expr xs) = + Sequent + (case EL 1 xs of Expr ys => MAP v_to_term ys) + (v_to_term (EL 2 xs)) End Definition v_to_update_def: - v_to_update v = - let n = getNum (head v) in - if n = name "ConstSpec" then - ConstSpec (MAP ((λxs. (v_to_str (HD xs), v_to_term (EL 1 xs))) o v2list) - (v2list (el1 v))) (v_to_term (el2 v)) - else if n = name "TypeDefn" then - TypeDefn (v_to_str (el1 v)) (v_to_term (el2 v)) - (v_to_str (el3 v)) (v_to_str (el3 (tail v))) - else if n = name "NewType" then - NewType (v_to_str (el1 v)) (getNum (el2 v)) - else if n = name "NewConst" then - NewConst (v_to_str (el1 v)) (v_to_ty (el2 v)) - else - NewAxiom (v_to_term (el1 v)) + v_to_update (Expr xs) = + if HD xs = Atom «ConstSpec» then + ConstSpec + (case EL 1 xs of Expr ys => + MAP (λy. case y of Expr zs => + (v_to_str (EL 0 zs), v_to_term (EL 1 zs))) ys) + (v_to_term (EL 2 xs)) + else if HD xs = Atom «TypeDefn» then + TypeDefn (v_to_str (EL 1 xs)) + (v_to_term (EL 2 xs)) + (v_to_str (EL 3 xs)) + (v_to_str (EL 4 xs)) + else if HD xs = Atom «NewType» then + NewType (v_to_str (EL 1 xs)) (THE (fromNatString (v_to_str (EL 2 xs)))) + else if HD xs = Atom «NewConst» then + NewConst (v_to_str (EL 1 xs)) (v_to_ty (EL 2 xs)) + else + NewAxiom (v_to_term (EL 1 xs)) End -Theorem v2list_nil_thm[simp]: - ∀xs. v2list (nil_list xs) = xs -Proof - Induct \\ simp [Once v2list_def] -QED - -Theorem v2list_thm[simp]: - ∀xs. v2list (list xs) = xs -Proof - Induct \\ simp [Once v2list_def] -QED - -Theorem str_to_v_thm[simp]: - ∀s. v_to_str (str_to_v s) = s -Proof - Cases \\ fs [str_to_v_def] - \\ rw [v_to_str_def,mlstringTheory.implode_def] - \\ fs [MAP_MAP_o,miscTheory.MAP_EQ_ID,CHR_ORD] -QED - Theorem ty_to_v_thm[simp]: ∀ty. v_to_ty (ty_to_v ty) = ty Proof ho_match_mp_tac ty_to_v_ind \\ rw [] - \\ simp [ty_to_v_def,Once v_to_ty_def] - \\ fs [name_def,MAP_MAP_o,miscTheory.MAP_EQ_ID] + \\ simp [ty_to_v_def,v_to_ty_def] + \\ fs [MAP_MAP_o,miscTheory.MAP_EQ_ID] QED Theorem term_to_v_thm[simp]: ∀t. v_to_term (term_to_v t) = t Proof - Induct - \\ simp [term_to_v_def,Once v_to_term_def] - \\ fs [name_def,MAP_MAP_o,miscTheory.MAP_EQ_ID] + Induct \\ simp [term_to_v_def,Once v_to_term_def] QED Theorem thm_to_v_thm[simp]: @@ -175,7 +132,7 @@ Theorem thm_to_v_thm[simp]: Proof Cases \\ simp [thm_to_v_def,Once v_to_thm_def] - \\ fs [name_def,MAP_MAP_o,miscTheory.MAP_EQ_ID] + \\ fs [MAP_MAP_o,miscTheory.MAP_EQ_ID] QED Theorem update_to_v_update[simp]: @@ -183,34 +140,62 @@ Theorem update_to_v_update[simp]: Proof Cases \\ simp [update_to_v_def,Once v_to_update_def] - \\ fs [name_def,MAP_MAP_o,miscTheory.MAP_EQ_ID,FORALL_PROD] - \\ rpt (simp [Once v2list_def]) + \\ fs [MAP_MAP_o,miscTheory.MAP_EQ_ID,FORALL_PROD] QED (* main definition *) Definition thm_to_string_def: thm_to_string (ctxt:update list) (th:thm) = - let vs = thm_to_v th :: MAP update_to_v ctxt in - implode (vs2str vs - ["# The following is a theorem of higher-order logic\n"; - "\n# which is proved in the following context\n"]) + concat + ([strlit "# The following is a theorem of higher-order logic\n\n"] ++ + [sexp_to_pretty_string (thm_to_v th)] ++ + [strlit "\n# which is proved in the following context\n"] ++ + FLAT (MAP (λdef. [«\n»; sexp_to_pretty_string (update_to_v def)]) ctxt)) End (* it has an inverse: *) +Definition char_list_to_defs_def: + char_list_to_defs input = + case mlsexp$parse input of + | INL _ => [] + | INR (x,rest) => v_to_update x :: char_list_to_defs rest +Termination + WF_REL_TAC ‘measure LENGTH’ \\ rw [] + \\ drule parse_IMP_LENGTH_LESS \\ simp [] +End + Definition string_to_thm_def: string_to_thm s = - let vs = v2list (parse (lexer (explode s)) (Num 0) []) in - (MAP v_to_update (TL vs), v_to_thm (HD vs)) + let rest = dropWhile (λc. c ≠ #"\n") (explode s) in + let (th_v, rest) = OUTR $ mlsexp$parse rest in + let rest = dropWhile (λc. c ≠ #"\n") (TL (TL rest)) in + (char_list_to_defs rest, v_to_thm th_v) End +Theorem to_explode[local]: + (case s of strlit t => t) = explode s +Proof + Cases_on ‘s’ \\ simp [] +QED + Theorem string_to_thm_thm_to_string: string_to_thm (thm_to_string ctxt th) = (ctxt,th) Proof - fs [parse_lexer_vs2str,thm_to_string_def,string_to_thm_def] - \\ once_rewrite_tac [v2list_def] \\ fs [] - \\ fs [MAP_MAP_o,miscTheory.MAP_EQ_ID,FORALL_PROD] + fs [thm_to_string_def,concat_append] + \\ simp [string_to_thm_def,parse_space] + \\ simp_tac std_ss [GSYM APPEND_ASSOC] + \\ rewrite_tac [parse_sexp_to_pretty_string, OUTR] + \\ simp_tac std_ss [thm_to_v_thm] \\ simp [] + \\ simp [Once char_list_to_defs_def,parse_space] + \\ simp [GSYM char_list_to_defs_def] + \\ Induct_on ‘ctxt’ >- EVAL_TAC + \\ rw [] \\ fs [concat_def,to_explode] + \\ simp [Once char_list_to_defs_def,parse_space] + \\ rewrite_tac [parse_sexp_to_pretty_string] \\ simp [] + \\ simp [Once char_list_to_defs_def,parse_space] + \\ simp [GSYM char_list_to_defs_def] QED Theorem thm_to_string_injective: @@ -243,4 +228,3 @@ val _ = |> concl |> rand |> rand |> stringSyntax.fromHOLstring |> print; end - From 8b8761eb8eb3409f53259f9384d154b0c0458ff6 Mon Sep 17 00:00:00 2001 From: Magnus Myreen Date: Tue, 16 Dec 2025 23:03:01 +0100 Subject: [PATCH 37/42] Fixes for changes to Candle theorem printing --- candle/prover/candle_kernelProgScript.sml | 38 ---- candle/prover/candle_kernel_permsScript.sml | 193 ++++++++++++-------- developers/build-sequence | 1 - 3 files changed, 115 insertions(+), 117 deletions(-) diff --git a/candle/prover/candle_kernelProgScript.sml b/candle/prover/candle_kernelProgScript.sml index 503c22e725..4911dcf772 100644 --- a/candle/prover/candle_kernelProgScript.sml +++ b/candle/prover/candle_kernelProgScript.sml @@ -7,7 +7,6 @@ Libs basisFunctionsLib runtime_checkLib Ancestors ml_hol_kernel_funsProg compute print_thm - (* lisp: *) lisp_parsing lisp_values lisp_printing (* compute: *) compute_syntax compute_eval compute_pmatch runtime_check @@ -17,47 +16,10 @@ val _ = (use_long_names := false); val _ = ml_prog_update open_local_block; -val r = translate lisp_valuesTheory.name_def; -val r = translate lisp_printingTheory.num2ascii_def; -val r = translate lisp_printingTheory.ascii_name_def; - -val lemma = prove(“ascii_name_side v3”, - fs [fetch "-" "ascii_name_side_def"] - \\ fs [Once lisp_printingTheory.num2ascii_def,AllCaseEqs()]) - |> update_precondition; - -val r = translate num2str_def; - -val lemma = prove(“∀n. num2str_side n”, - ho_match_mp_tac lisp_printingTheory.num2str_ind - \\ rw [] \\ simp [Once (fetch "-" "num2str_side_def")] - \\ rw [] \\ gvs [DIV_LT_X] - \\ ‘n MOD 10 < 10’ by fs [] - \\ decide_tac) - |> update_precondition; - -val r = translate lisp_printingTheory.name2str_def; -val r = translate lisp_valuesTheory.list_def; -val r = translate nil_list_def; -val r = translate str_to_v_def; val r = translate ty_to_v_def; val r = translate term_to_v_def; val r = translate thm_to_v_def; val r = translate update_to_v_def; -val r = translate dest_quote_def; -val r = translate dest_list_def; -val r = translate newlines_def; -val r = translate v2pretty_def; -val r = translate get_size_def; -val r = translate get_next_size_def; -val r = translate annotate_def; -val r = translate remove_all_def; -val r = translate smart_remove_def; -val r = translate flatten_def; -val r = translate dropWhile_def; -val r = translate is_comment_def; -val r = translate v2str_def; -val r = translate vs2str_def; val r = translate thm_to_string_def; val _ = ml_prog_update open_local_in_block; diff --git a/candle/prover/candle_kernel_permsScript.sml b/candle/prover/candle_kernel_permsScript.sml index 9bc44930f0..d53f24b203 100644 --- a/candle/prover/candle_kernel_permsScript.sml +++ b/candle/prover/candle_kernel_permsScript.sml @@ -663,52 +663,48 @@ Proof \\ rw[] QED -Theorem perms_ok_dropwhile_v[simp]: - perms_ok ps dropwhile_v -Proof - rw[perms_ok_def, dropwhile_v_def, astTheory.pat_bindings_def, perms_ok_env_def] - \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] -QED - Theorem perms_ok_flatten_v[simp]: perms_ok ps flatten_v Proof - rw[perms_ok_def, flatten_v_def, astTheory.pat_bindings_def, perms_ok_env_def] - \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] + rw[perms_ok_def, SexpProgTheory.flatten_v_def, astTheory.pat_bindings_def, + perms_ok_env_def] \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] QED Theorem perms_ok_get_size_v[simp]: perms_ok ps get_size_v Proof - rw[perms_ok_def, get_size_v_def, astTheory.pat_bindings_def, perms_ok_env_def] - \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] + rw[perms_ok_def, SexpProgTheory.get_size_v_def, astTheory.pat_bindings_def, + perms_ok_env_def] \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] QED Theorem perms_ok_get_next_size_v[simp]: perms_ok ps get_next_size_v Proof - rw[perms_ok_def, get_next_size_v_def, astTheory.pat_bindings_def, perms_ok_env_def] + rw[perms_ok_def, SexpProgTheory.get_next_size_v_def, astTheory.pat_bindings_def, + perms_ok_env_def] \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] QED Theorem perms_ok_remove_all_v[simp]: perms_ok ps remove_all_v Proof - rw[perms_ok_def, remove_all_v_def, astTheory.pat_bindings_def, perms_ok_env_def] + rw[perms_ok_def, SexpProgTheory.remove_all_v_def, astTheory.pat_bindings_def, + perms_ok_env_def] \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] QED Theorem perms_ok_smart_remove_v[simp]: perms_ok ps smart_remove_v Proof - rw[perms_ok_def, smart_remove_v_def, astTheory.pat_bindings_def, perms_ok_env_def] + rw[perms_ok_def, SexpProgTheory.smart_remove_v_def, astTheory.pat_bindings_def, + perms_ok_env_def] \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] QED Theorem perms_ok_annotate_v[simp]: perms_ok ps annotate_v Proof - rw[perms_ok_def, annotate_v_def, astTheory.pat_bindings_def, perms_ok_env_def] + rw[perms_ok_def, SexpProgTheory.annotate_v_def, astTheory.pat_bindings_def, perms_ok_env_def] \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] QED @@ -722,128 +718,177 @@ Proof \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] QED -Theorem perms_ok_name_v[simp]: - perms_ok ps name_v +Theorem perms_ok_newlines_v[simp]: + perms_ok ps newlines_v Proof - rw[perms_ok_def, name_v_def, astTheory.pat_bindings_def, perms_ok_env_def] + rw[perms_ok_def, SexpProgTheory.newlines_v_def, astTheory.pat_bindings_def, perms_ok_env_def] \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] QED -Theorem perms_ok_name2str_v[simp]: - perms_ok ps name2str_v +Theorem perms_ok_v2pretty_v[simp]: + perms_ok ps v2pretty_v Proof - rw[perms_ok_def, name2str_v_def, astTheory.pat_bindings_def, perms_ok_env_def] + rw[perms_ok_def, SexpProgTheory.v2pretty_v_def, astTheory.pat_bindings_def, perms_ok_env_def] + \\ gvs [] \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] +QED + +Theorem perms_ok_str_tree_to_strs_v[simp]: + perms_ok ps str_tree_to_strs_v +Proof + rw[perms_ok_def, SexpProgTheory.str_tree_to_strs_v_def, astTheory.pat_bindings_def, + perms_ok_env_def] \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] - \\ rw[perms_ok_def, ascii_name_v_def, astTheory.pat_bindings_def, perms_ok_env_def, - ListProgTheory.hd_v_def,num2str_v_def] +QED + +Theorem perms_ok_null_v[simp]: + perms_ok ps ListProg$null_v +Proof + rw[perms_ok_def, ListProgTheory.null_v_def, astTheory.pat_bindings_def, perms_ok_env_def] \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] - \\ rw[ListProgTheory.hd_v_def,perms_ok_def, - num2ascii_v_def, astTheory.pat_bindings_def, perms_ok_env_def] +QED + +Theorem perms_ok_flat_v[simp]: + perms_ok ps ListProg$flat_v +Proof + rw[perms_ok_def, ListProgTheory.flat_v_def, astTheory.pat_bindings_def, perms_ok_env_def] \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] QED -Theorem perms_ok_dest_quote_v[simp]: - perms_ok ps dest_quote_v +Theorem perms_ok_exp_for_dec_enc_v[simp]: + perms_ok ps exp_for_dec_enc_v Proof - rw[perms_ok_def, dest_quote_v_def, astTheory.pat_bindings_def, perms_ok_env_def] + simp [perms_ok_def, + IntProgTheory.exp_for_dec_enc_v_thm |> CONV_RULE EVAL] +QED + +Theorem perms_ok_tochar_v[simp]: + perms_ok ps IntProg$tochar_v +Proof + rw[perms_ok_def, IntProgTheory.tochar_v_def, + astTheory.pat_bindings_def, perms_ok_env_def] \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] QED -Theorem perms_ok_dest_list_v[simp]: - perms_ok ps dest_list_v +Theorem perms_ok_num_to_chars_v[simp]: + perms_ok ps IntProg$num_to_chars_v Proof - rw[perms_ok_def, dest_list_v_def, astTheory.pat_bindings_def, perms_ok_env_def] + rw[perms_ok_def, IntProgTheory.num_to_chars_v_def, + astTheory.pat_bindings_def, perms_ok_env_def] \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] QED -Theorem perms_ok_newlines_v[simp]: - perms_ok ps newlines_v +Theorem perms_ok_int_to_string_v[simp]: + perms_ok ps IntProg$int_to_string_v Proof - rw[perms_ok_def, newlines_v_def, astTheory.pat_bindings_def, perms_ok_env_def] + rw[perms_ok_def, IntProgTheory.int_to_string_v_def, + astTheory.pat_bindings_def, perms_ok_env_def] \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] QED -Theorem perms_ok_v2pretty_v[simp]: - perms_ok ps v2pretty_v +Theorem perms_ok_tostring_v[simp]: + perms_ok ps IntProg$tostring_v Proof - rw[perms_ok_def, v2pretty_v_def, astTheory.pat_bindings_def, perms_ok_env_def] + rw[perms_ok_def, IntProgTheory.tostring_v_def, astTheory.pat_bindings_def, perms_ok_env_def] \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] QED -Theorem perms_ok_v2str_v[simp]: - perms_ok ps v2str_v +Theorem perms_ok_ty_to_v_v[simp]: + perms_ok ps ty_to_v_v Proof - rw[perms_ok_def, v2str_v_def, astTheory.pat_bindings_def, perms_ok_env_def] + rw[perms_ok_def, ty_to_v_v_def, astTheory.pat_bindings_def, perms_ok_env_def] \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] QED -Theorem perms_ok_is_comment_v[simp]: - perms_ok ps is_comment_v +Theorem perms_ok_term_to_v_v[simp]: + perms_ok ps term_to_v_v Proof - rw[perms_ok_def, is_comment_v_def, astTheory.pat_bindings_def, perms_ok_env_def] + rw[perms_ok_def, term_to_v_v_def, astTheory.pat_bindings_def, perms_ok_env_def] \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] QED -Theorem perms_ok_vs2str_v[simp]: - perms_ok ps vs2str_v +Theorem perms_ok_thm_to_v_v[simp]: + perms_ok ps thm_to_v_v Proof - rw[perms_ok_def, vs2str_v_def, astTheory.pat_bindings_def, perms_ok_env_def] + rw[perms_ok_def, thm_to_v_v_def, astTheory.pat_bindings_def, perms_ok_env_def] \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] QED -Theorem perms_ok_list_v[simp]: - perms_ok ps list_v +Theorem perms_ok_update_to_v_v[simp]: + perms_ok ps update_to_v_v Proof - rw[perms_ok_def, list_v_def, astTheory.pat_bindings_def, perms_ok_env_def] + rw[perms_ok_def, update_to_v_v_def, astTheory.pat_bindings_def, perms_ok_env_def] \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] QED -Theorem perms_ok_nil_list_v[simp]: - perms_ok ps nil_list_v +Theorem perms_ok_str_every_v[simp]: + perms_ok ps str_every_v Proof - rw[perms_ok_def, nil_list_v_def, astTheory.pat_bindings_def, perms_ok_env_def] + rw[perms_ok_def, SexpProgTheory.str_every_v_def, + astTheory.pat_bindings_def, perms_ok_env_def] \\ gvs [] \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] QED -Theorem perms_ok_null_v[simp]: - perms_ok ps ListProg$null_v +Theorem perms_ok_isspace_v[simp]: + perms_ok ps isspace_v Proof - rw[perms_ok_def, ListProgTheory.null_v_def, astTheory.pat_bindings_def, perms_ok_env_def] + rw[perms_ok_def, CharProgTheory.isspace_v_def, + astTheory.pat_bindings_def, perms_ok_env_def] \\ gvs [] \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] QED -Theorem perms_ok_str_to_v_v[simp]: - perms_ok ps str_to_v_v +Theorem perms_ok_is_safe_char_v[simp]: + perms_ok ps is_safe_char_v Proof - rw[perms_ok_def, str_to_v_v_def, astTheory.pat_bindings_def, perms_ok_env_def] + rw[perms_ok_def, SexpProgTheory.is_safe_char_v_def, + astTheory.pat_bindings_def, perms_ok_env_def] \\ gvs [] \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] QED -Theorem perms_ok_ty_to_v_v[simp]: - perms_ok ps ty_to_v_v +Theorem perms_ok_char_escape_seq_v[simp]: + perms_ok ps char_escape_seq_v Proof - rw[perms_ok_def, ty_to_v_v_def, astTheory.pat_bindings_def, perms_ok_env_def] + rw[perms_ok_def, StringProgTheory.char_escape_seq_v_def, + astTheory.pat_bindings_def, perms_ok_env_def] \\ gvs [] \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] QED -Theorem perms_ok_term_to_v_v[simp]: - perms_ok ps term_to_v_v +Theorem perms_ok_char_escaped_v[simp]: + perms_ok ps char_escaped_v Proof - rw[perms_ok_def, term_to_v_v_def, astTheory.pat_bindings_def, perms_ok_env_def] + rw[perms_ok_def, StringProgTheory.char_escaped_v_def, + astTheory.pat_bindings_def, perms_ok_env_def] \\ gvs [] \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] QED -Theorem perms_ok_thm_to_v_v[simp]: - perms_ok ps thm_to_v_v +Theorem perms_ok_escape_str_v[simp]: + perms_ok ps escape_str_v Proof - rw[perms_ok_def, thm_to_v_v_def, astTheory.pat_bindings_def, perms_ok_env_def] + rw[perms_ok_def, StringProgTheory.escape_str_v_def, + astTheory.pat_bindings_def, perms_ok_env_def] \\ gvs [] \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] QED -Theorem perms_ok_update_to_v_v[simp]: - perms_ok ps update_to_v_v +Theorem perms_ok_make_str_safe_v[simp]: + perms_ok ps make_str_safe_v Proof - rw[perms_ok_def, update_to_v_v_def, astTheory.pat_bindings_def, perms_ok_env_def] + rw[perms_ok_def, SexpProgTheory.make_str_safe_v_def, + astTheory.pat_bindings_def, perms_ok_env_def] \\ gvs [] + \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] +QED + +Theorem perms_ok_sexp2tree_v[simp]: + perms_ok ps sexp2tree_v +Proof + rw[perms_ok_def, SexpProgTheory.sexp2tree_v_def, + astTheory.pat_bindings_def, perms_ok_env_def] \\ gvs [] + \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] +QED + +Theorem perms_ok_sexp_to_pretty_string_v[simp]: + perms_ok ps sexp_to_pretty_string_v +Proof + rw[perms_ok_def, SexpProgTheory.sexp_to_pretty_string_v_def, + astTheory.pat_bindings_def, perms_ok_env_def] \\ pop_assum mp_tac \\ eval_nsLookup_tac \\ rw [] \\ fs [] QED @@ -1212,14 +1257,6 @@ Proof \\ rw[] QED -Theorem perms_ok_flat_v[simp]: - perms_ok ps ListProg$flat_v -Proof - rw[perms_ok_def, ListProgTheory.flat_v_def, astTheory.pat_bindings_def, perms_ok_env_def] - \\ pop_assum mp_tac \\ eval_nsLookup_tac - \\ rw[] -QED - Theorem perms_ok_check_var_v[simp]: perms_ok ps check_var_v Proof diff --git a/developers/build-sequence b/developers/build-sequence index d9f4a17f8d..a6c2f09214 100644 --- a/developers/build-sequence +++ b/developers/build-sequence @@ -94,7 +94,6 @@ candle/standard/syntax candle/standard/semantics candle/standard/monadic candle/standard/ml_kernel -candle/standard/ml_kernel/lisp candle/overloading candle/overloading/syntax candle/overloading/semantics From cd277c48afbd72fe2071b571c53ddc32e352dac1 Mon Sep 17 00:00:00 2001 From: Daniel Nezamabadi <55559979+dnezam@users.noreply.github.com> Date: Wed, 17 Dec 2025 08:55:42 +0100 Subject: [PATCH 38/42] Fix sexp_to_dafny --- compiler/dafny/sexp_to_dafnyScript.sml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/dafny/sexp_to_dafnyScript.sml b/compiler/dafny/sexp_to_dafnyScript.sml index 69a16caac0..adebd975a6 100644 --- a/compiler/dafny/sexp_to_dafnyScript.sml +++ b/compiler/dafny/sexp_to_dafnyScript.sml @@ -33,7 +33,7 @@ Definition to_int_def: to_int (Atom s) = (case fromString s of | NONE => fail «to_int: fromString failed» - | SOME i => return i) ∧ + | SOME (i: int) => return i) ∧ to_int _ = fail «to_int: Was not Atom» End From 27abb4bf32ff72cd5f024c6ec124f67b3b69e044 Mon Sep 17 00:00:00 2001 From: Magnus Myreen Date: Wed, 17 Dec 2025 12:15:32 +0100 Subject: [PATCH 39/42] Remove mention of str_treeTheory --- compiler/bootstrap/translation/explorerProgScript.sml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/compiler/bootstrap/translation/explorerProgScript.sml b/compiler/bootstrap/translation/explorerProgScript.sml index 8a98834b6a..327b2041ef 100644 --- a/compiler/bootstrap/translation/explorerProgScript.sml +++ b/compiler/bootstrap/translation/explorerProgScript.sml @@ -56,9 +56,8 @@ QED val _ = json_to_mlstring_ind |> update_precondition; -(* str_tree and displayLang *) +(* displayLang *) -val r = translate str_treeTheory.v2strs_def; val r = translate displayLangTheory.display_to_str_tree_def; (* presLang *) From caaa9089584b24c2dd30a5324421141b33ee5b18 Mon Sep 17 00:00:00 2001 From: Magnus Myreen Date: Wed, 17 Dec 2025 13:09:54 +0100 Subject: [PATCH 40/42] Remove a now dated example --- examples/Holmakefile | 14 ++++-- examples/README.md | 3 -- examples/lispProgScript.sml | 96 ------------------------------------- 3 files changed, 9 insertions(+), 104 deletions(-) delete mode 100644 examples/lispProgScript.sml diff --git a/examples/Holmakefile b/examples/Holmakefile index 41f507a5fa..5226d0e6fa 100644 --- a/examples/Holmakefile +++ b/examples/Holmakefile @@ -1,8 +1,12 @@ -INCLUDES = $(HOLDIR)/examples/formal-languages/regular $(HOLDIR)/examples/bootstrap \ - $(HOLDIR)/examples/Crypto \ - $(CAKEMLDIR)/developers $(CAKEMLDIR)/misc\ - $(CAKEMLDIR)/semantics $(CAKEMLDIR)/basis/pure $(CAKEMLDIR)/basis\ - $(CAKEMLDIR)/translator $(CAKEMLDIR)/characteristic +INCLUDES = $(HOLDIR)/examples/formal-languages/regular \ + $(HOLDIR)/examples/Crypto \ + $(CAKEMLDIR)/developers \ + $(CAKEMLDIR)/misc \ + $(CAKEMLDIR)/semantics \ + $(CAKEMLDIR)/basis/pure \ + $(CAKEMLDIR)/basis \ + $(CAKEMLDIR)/translator \ + $(CAKEMLDIR)/characteristic all: $(DEFAULT_TARGETS) README.md .PHONY: all diff --git a/examples/README.md b/examples/README.md index d2ab8a4ca1..09afe5c151 100644 --- a/examples/README.md +++ b/examples/README.md @@ -54,9 +54,6 @@ Faster cat: process 2048 chars at a time. [lcsScript.sml](lcsScript.sml): Verification of longest common subsequence algorithms. -[lispProgScript.sml](lispProgScript.sml): -Parsing and pretty printing of s-expressions - [lpr_checker](lpr_checker): An LPR checker built on CakeML diff --git a/examples/lispProgScript.sml b/examples/lispProgScript.sml deleted file mode 100644 index 8da7fced66..0000000000 --- a/examples/lispProgScript.sml +++ /dev/null @@ -1,96 +0,0 @@ -(* - Parsing and pretty printing of s-expressions -*) -Theory lispProg -Libs - preamble basis -Ancestors - misc set_sep list mlstring - (* lisp: *) parsing source_values printing - -val _ = translation_extends "basisProg"; - -val _ = show_assums := true; - -(* --- functions from source_valuesTheory --- *) - -val res = translate name_def; -val res = translate head_def; -val res = translate tail_def; -val res = translate isNum_def; -val res = translate v2list_def; -val res = translate list_def; - -(* --- functions from parsingTheory --- *) - -val res = translate quote_def; -val res = translate parse_def; -val res = translate end_line_def; -val res = translate read_num_def; -val res = translate_no_ind (REWRITE_RULE [MEMBER_INTRO] lex_def); - -Theorem lex_ind[local]: - lex_ind -Proof - rewrite_tac [fetch "-" "lex_ind_def"] - \\ rpt gen_tac - \\ rpt (disch_then strip_assume_tac) - \\ match_mp_tac (latest_ind ()) - \\ rpt strip_tac - \\ last_x_assum match_mp_tac - \\ rpt strip_tac - \\ fs [FORALL_PROD] - \\ gvs[MEMBER_def] -QED - -val _ = update_precondition lex_ind; - -val res = translate lexer_def; - -Definition from_str_def: - from_str s = head (parse (lexer s) (Num 0) []) -End - -val res = translate from_str_def; - -(* --- functions from printingTheory --- *) - -val res = translate num2str_def; - -Theorem num2str_side[local]: - ∀n. num2str_side n -Proof - ho_match_mp_tac num2str_ind \\ rpt strip_tac - \\ once_rewrite_tac [fetch "-" "num2str_side_def"] \\ rw [] - \\ ‘n MOD 10 < 10’ by fs [MOD_LESS, SF SFY_ss] \\ simp [] -QED - -val _ = update_precondition num2str_side; - -val res = translate num2ascii_def; -val res = translate ascii_name_def; - -Theorem ascii_name_side[local]: - ∀n. ascii_name_side n -Proof - rw [fetch "-" "ascii_name_side_def"] - \\ fs [Once num2ascii_def,AllCaseEqs()] -QED - -val _ = update_precondition ascii_name_side; - -val res = translate name2str_def; -val res = translate dest_quote_def; -val res = translate dest_list_def; -val res = translate newlines_def; -val res = translate v2pretty_def; -val res = translate get_size_def; -val res = translate get_next_size_def; -val res = translate annotate_def; -val res = translate remove_all_def; -val res = translate smart_remove_def; -val res = translate flatten_def; -val res = translate dropWhile_def; -val res = translate is_comment_def; -val res = translate v2str_def; -val res = translate vs2str_def; From 8c1133f53843f3bbdf7012ea0c7ab16b2f30015b Mon Sep 17 00:00:00 2001 From: Magnus Myreen Date: Wed, 17 Dec 2025 23:22:04 +0100 Subject: [PATCH 41/42] Fix stamp numbers --- characteristic/examples/cf_examplesScript.sml | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/characteristic/examples/cf_examplesScript.sml b/characteristic/examples/cf_examplesScript.sml index 87d22542e4..64fdeda10e 100644 --- a/characteristic/examples/cf_examplesScript.sml +++ b/characteristic/examples/cf_examplesScript.sml @@ -203,10 +203,10 @@ Theorem example_raise_spec[local]: !uv. UNIT_TYPE () uv ==> app (p:'ffi ffi_proj) example_raise_v [uv] - emp (POSTe v. & (v = Conv (SOME (ExnStamp 8)) [])) + emp (POSTe v. & (v = Conv (SOME (ExnStamp 9)) [])) Proof rpt strip_tac \\ xcf' "example_raise" \\ - xlet `POSTv ev. & (ev = Conv (SOME (ExnStamp 8)) [])` + xlet `POSTv ev. & (ev = Conv (SOME (ExnStamp 9)) [])` THEN1 (xcon \\ xsimpl) \\ xraise \\ xsimpl QED @@ -228,9 +228,9 @@ Theorem example_handle_spec[local]: Proof rpt strip_tac \\ xcf' "example_handle" \\ - xhandle `POSTe v. & Foo_exn 9 3 v` + xhandle `POSTe v. & Foo_exn 10 3 v` THEN1 ( - xlet `POSTv v. & Foo_exn 9 3 v` + xlet `POSTv v. & Foo_exn 10 3 v` THEN1 (xcon \\ fs [Foo_exn_def] \\ xsimpl) \\ xraise \\ xsimpl ) \\ @@ -255,7 +255,7 @@ Theorem example_handle2_spec[local]: Proof rpt strip_tac \\ xcf' "example_handle2" \\ xhandle ‘POSTve (\v. & (x > 0 /\ INT 1 v)) - (\e. & (x <= 0 /\ Foo_exn 10 (-1) e))’ + (\e. & (x <= 0 /\ Foo_exn 11 (-1) e))’ THEN1 ( xlet `POSTv bv. & (BOOL (x > 0) bv)` THEN1 (xapp \\ fs []) \\ @@ -265,7 +265,7 @@ Proof irule FALSITY \\ intLib.ARITH_TAC ) THEN1 ( - xlet `POSTv ev. & Foo_exn 10 (-1) ev` + xlet `POSTv ev. & Foo_exn 11 (-1) ev` THEN1 (xret \\ fs [Foo_exn_def] \\ xsimpl) \\ xraise \\ xsimpl \\ intLib.ARITH_TAC ) @@ -469,4 +469,3 @@ Proof xsimpl>> rw[]>>simp[Once even_odd_def] QED - From 475edaf639f97ac56b1bac2d018abb097ba067a3 Mon Sep 17 00:00:00 2001 From: Magnus Myreen Date: Thu, 18 Dec 2025 20:40:33 +0100 Subject: [PATCH 42/42] Revert "Remove a now dated example" This reverts commit caaa9089584b24c2dd30a5324421141b33ee5b18. --- examples/Holmakefile | 14 ++---- examples/README.md | 3 ++ examples/lispProgScript.sml | 96 +++++++++++++++++++++++++++++++++++++ 3 files changed, 104 insertions(+), 9 deletions(-) create mode 100644 examples/lispProgScript.sml diff --git a/examples/Holmakefile b/examples/Holmakefile index 5226d0e6fa..41f507a5fa 100644 --- a/examples/Holmakefile +++ b/examples/Holmakefile @@ -1,12 +1,8 @@ -INCLUDES = $(HOLDIR)/examples/formal-languages/regular \ - $(HOLDIR)/examples/Crypto \ - $(CAKEMLDIR)/developers \ - $(CAKEMLDIR)/misc \ - $(CAKEMLDIR)/semantics \ - $(CAKEMLDIR)/basis/pure \ - $(CAKEMLDIR)/basis \ - $(CAKEMLDIR)/translator \ - $(CAKEMLDIR)/characteristic +INCLUDES = $(HOLDIR)/examples/formal-languages/regular $(HOLDIR)/examples/bootstrap \ + $(HOLDIR)/examples/Crypto \ + $(CAKEMLDIR)/developers $(CAKEMLDIR)/misc\ + $(CAKEMLDIR)/semantics $(CAKEMLDIR)/basis/pure $(CAKEMLDIR)/basis\ + $(CAKEMLDIR)/translator $(CAKEMLDIR)/characteristic all: $(DEFAULT_TARGETS) README.md .PHONY: all diff --git a/examples/README.md b/examples/README.md index 09afe5c151..d2ab8a4ca1 100644 --- a/examples/README.md +++ b/examples/README.md @@ -54,6 +54,9 @@ Faster cat: process 2048 chars at a time. [lcsScript.sml](lcsScript.sml): Verification of longest common subsequence algorithms. +[lispProgScript.sml](lispProgScript.sml): +Parsing and pretty printing of s-expressions + [lpr_checker](lpr_checker): An LPR checker built on CakeML diff --git a/examples/lispProgScript.sml b/examples/lispProgScript.sml new file mode 100644 index 0000000000..8da7fced66 --- /dev/null +++ b/examples/lispProgScript.sml @@ -0,0 +1,96 @@ +(* + Parsing and pretty printing of s-expressions +*) +Theory lispProg +Libs + preamble basis +Ancestors + misc set_sep list mlstring + (* lisp: *) parsing source_values printing + +val _ = translation_extends "basisProg"; + +val _ = show_assums := true; + +(* --- functions from source_valuesTheory --- *) + +val res = translate name_def; +val res = translate head_def; +val res = translate tail_def; +val res = translate isNum_def; +val res = translate v2list_def; +val res = translate list_def; + +(* --- functions from parsingTheory --- *) + +val res = translate quote_def; +val res = translate parse_def; +val res = translate end_line_def; +val res = translate read_num_def; +val res = translate_no_ind (REWRITE_RULE [MEMBER_INTRO] lex_def); + +Theorem lex_ind[local]: + lex_ind +Proof + rewrite_tac [fetch "-" "lex_ind_def"] + \\ rpt gen_tac + \\ rpt (disch_then strip_assume_tac) + \\ match_mp_tac (latest_ind ()) + \\ rpt strip_tac + \\ last_x_assum match_mp_tac + \\ rpt strip_tac + \\ fs [FORALL_PROD] + \\ gvs[MEMBER_def] +QED + +val _ = update_precondition lex_ind; + +val res = translate lexer_def; + +Definition from_str_def: + from_str s = head (parse (lexer s) (Num 0) []) +End + +val res = translate from_str_def; + +(* --- functions from printingTheory --- *) + +val res = translate num2str_def; + +Theorem num2str_side[local]: + ∀n. num2str_side n +Proof + ho_match_mp_tac num2str_ind \\ rpt strip_tac + \\ once_rewrite_tac [fetch "-" "num2str_side_def"] \\ rw [] + \\ ‘n MOD 10 < 10’ by fs [MOD_LESS, SF SFY_ss] \\ simp [] +QED + +val _ = update_precondition num2str_side; + +val res = translate num2ascii_def; +val res = translate ascii_name_def; + +Theorem ascii_name_side[local]: + ∀n. ascii_name_side n +Proof + rw [fetch "-" "ascii_name_side_def"] + \\ fs [Once num2ascii_def,AllCaseEqs()] +QED + +val _ = update_precondition ascii_name_side; + +val res = translate name2str_def; +val res = translate dest_quote_def; +val res = translate dest_list_def; +val res = translate newlines_def; +val res = translate v2pretty_def; +val res = translate get_size_def; +val res = translate get_next_size_def; +val res = translate annotate_def; +val res = translate remove_all_def; +val res = translate smart_remove_def; +val res = translate flatten_def; +val res = translate dropWhile_def; +val res = translate is_comment_def; +val res = translate v2str_def; +val res = translate vs2str_def;