From a19d0cee8160a5ae8b73fc0e7fe6eed763251b4b Mon Sep 17 00:00:00 2001 From: Mikael Mayer Date: Tue, 3 Jan 2023 14:56:45 -0600 Subject: [PATCH 01/22] First commit of the parsers combinator --- src/Parsers/README.md | 84 ++ src/Parsers/exampleArithmetic.dfy | 119 +++ src/Parsers/library.dfy | 53 ++ src/Parsers/parser.dfy | 1250 +++++++++++++++++++++++++++++ 4 files changed, 1506 insertions(+) create mode 100644 src/Parsers/README.md create mode 100644 src/Parsers/exampleArithmetic.dfy create mode 100644 src/Parsers/library.dfy create mode 100644 src/Parsers/parser.dfy diff --git a/src/Parsers/README.md b/src/Parsers/README.md new file mode 100644 index 00000000..846190a0 --- /dev/null +++ b/src/Parsers/README.md @@ -0,0 +1,84 @@ +# Verified Parser Combinator Library in Dafny + +To view a full example of how to use the parser combinator library, +especially how to define a recursive parser that is guaranteed to terminate, +please refer to the file `exampleArithmetic.dfy`, which parses +an arithmetic expression. + +To get started, first you need to import the parser, I recommend: + +``` +include "parser.dfy" +import opened Parsers +import ParserEngine +``` + + +Then, define a class that extends `ParserEngine.Engine` and defines +the input string at the same time: + +``` + class MyParserEngine extends ParserEngine.Engine { + constructor(input: string) { + this.input := input; + } + } +``` + +A parser is a partial function that takes a position and returns a `ParseResult`. Errors have two levels, recoverable or not. + +> ``` +> type Parser<+T> = nat --> ParseResult +> +> datatype ParseResult<+T> = +> | PFailure(level: FailureLevel, message: string, pos: nat) +> | PSuccess(pos: nat, t: T) +> +> datatype FailureLevel = Error | Recoverable +> ``` + +In this class, you can define parsers yourself, or use building blocks. +For example, + +``` + function method ParseId?(): Parser + { + While?((pos: nat) requires pos <= |input| => + pos < |input| && input[pos] in "azertyuiopqsdfghjklmwxcvbnAZERTYUIOPQSDFGHJKLMWXCVBN_7894561230" + ) + } + + function method ParseId(): Parser + { + Or(ParseId?(), Fail("Expected identifier", Error)) + } + + function method ParseField(): Parser + { + Bind(Concat(ParseId(), ConcatR(Const(":"), ParseId())), + (result: (ID, ID), newPos: nat) => Succeed(Field(result.0, result.1))) + } + // It's the same as using Map() instead of Bind(), and removing the "Succeed(" and the `newPos` parameter) + + datatype FieldDeclaration = Field(name: ID, value: ID) +``` + +To invoke your function, define a main method like this: + +``` + +method Main() { + var content = "happy:code"; + + var t := new MyParserEngine(content); + var parseResult := t.ParseField()(0); + if parseResult.PFailure? { + t.ReportError(parseResult); // Nice error reporting message with quoting the line and position of failure, along with the message + return; + } + var result := parseResult.t; + print result; +} +``` + + diff --git a/src/Parsers/exampleArithmetic.dfy b/src/Parsers/exampleArithmetic.dfy new file mode 100644 index 00000000..25034375 --- /dev/null +++ b/src/Parsers/exampleArithmetic.dfy @@ -0,0 +1,119 @@ +include "parser.dfy" + +module {:options "-functionSyntax:4", "-quantifierSyntax:4"} EngineParens { + import opened Parsers + import opened ParserBuilders + import opened ParserEngine + + class EngineParens extends ParserEngine.Engine { + constructor(input: string) { + this.input := input; + } + + const fixmapBase: map> := map[] + + function {:opaque} atom(functions: set): (mapper: ParserMapper) + requires "plus__" in functions + ensures RecSpecOnce("atom", functions, mapper) + { + (rec: ParserSelector, pos: nat) requires RecSpec("atom", functions, rec, pos) => + C?("(").o_I(B(rec("plus__")).I_o(C(")"))) + .O(N().M(n => Number(n))).apply(pos) + } + + function {:opaque} times(functions: set): (mapper: ParserMapper) + requires "atom" in functions + ensures RecSpecOnce("times", functions, mapper) + { + (rec: ParserSelector, pos: nat) requires RecSpec("times", functions, rec, pos) => + Bind(rec("atom"), (expr: Expression, pos': nat) => + RepAcc(Concat(Or(Const?("*"), Or(Const?("/"), Const?("%"))), rec("atom")), expr, Expression.InfixBuilder()))(pos) + } + + function {:opaque} plus(functions: set): (mapper: ParserMapper) + requires "times" in functions + ensures RecSpecOnce("plus__", functions, mapper) + { + (rec: ParserSelector, pos: nat) requires RecSpec("plus__", functions, rec, pos) => + Bind(rec("times"), (expr: Expression, pos': nat) => + RepAcc(Concat(Or(Const?("+"), Const?("-")), rec("times")), expr, Expression.InfixBuilder()))(pos) + } + + /* The DSL makes verification 7/2 slower (7M resource units vs 2M resource units above*/ + /*function {:opaque} plus(functions: set): (mapper: ParserMapper) + requires "times" in functions + ensures FixMapInnerOnce("plus__", mapper, functions, |input|) + { + (rec: ParserSelector, pos: nat) + requires RecSpec("plus__", functions, rec, pos) => + B(rec("times")).Then((expr: Expression, pos': nat) => + C?("+").o_I(B(rec("times"))) + .Repeat(expr, Expression.BinaryBuilder("+"))).apply(pos) + }*/ + + function {:opaque} ExpressionsFix(): (r: ParseResult) + { + var functions := {"atom", "times", "plus__"}; + var underlying := fixmapBase[ + "atom" := atom(functions)][ + "times" := times(functions)][ + "plus__" := plus(functions)]; + FixMap(underlying, "plus__")(0) + } + } + + function NatToString(n: nat): string { + if 0 <= n <= 9 then + ["0123456789"[n]] + else + NatToString(n/10) + NatToString(n%10) + } + + datatype Expression = + | Binary(op: string, left: Expression, right: Expression) + | Number(x: nat) + { + static function BinaryBuilder(op: string): (Expression, Expression) -> Expression + { + (left: Expression, right: Expression) => Binary(op, left, right) + } + static function InfixBuilder(): (Expression, (string, Expression)) -> Expression + { + (left: Expression, right: (string, Expression)) => Binary(right.0, left, right.1) + } + function ToString(level: nat): string + requires level <= 2 + { + match this + case Number(x) => NatToString(x) + case Binary(op, left, right) => + (match level case 0 => "(" case 1 => "[" case 2 => "{") + + left.ToString((level + 1)%3) + op + right.ToString((level + 1) % 3) + + (match level case 0 => ")" case 1 => "]" case 2 => "}") + } + } + + function repeat(str: string, n: nat): (r: string) + ensures |r| == |str| * n + { + if n == 0 then "" + else str + repeat(str, n-1) + } + + method Main(args: seq) { + if |args| <= 1 { + return; + } + for i := 1 to |args| { + var input := args[i]; + var engine := new EngineParens(input); + match engine.ExpressionsFix() { + case PSuccess(_, n) => print "result:", n.ToString(0); + case PFailure(level, error, pos) => print input, "\n"; + print repeat(" ", pos), "^","\n"; + print error; + } + print "\n"; + } + } +} \ No newline at end of file diff --git a/src/Parsers/library.dfy b/src/Parsers/library.dfy new file mode 100644 index 00000000..b0b765e9 --- /dev/null +++ b/src/Parsers/library.dfy @@ -0,0 +1,53 @@ +module {:options "/functionSyntax:4"} Library { + // Library + datatype Option = Some(value: T) | None + datatype Result = Success(value: T) | Failure(s: string, pos: int) { + predicate IsFailure() { + Failure? + } + function PropagateFailure(): Result + requires IsFailure() + { + Failure(s, pos) + } + function Extract(): T + requires !IsFailure() + { + value + } + } +} +module {:options "/functionSyntax:4"} Printer { + + type stringNat = s: string | + |s| > 0 && (|s| > 1 ==> s[0] != '0') && + forall i | 0 <= i < |s| :: s[i] in "0123456789" + witness "1" + + function natToString(n: nat): stringNat { + match n + case 0 => "0" case 1 => "1" case 2 => "2" case 3 => "3" case 4 => "4" + case 5 => "5" case 6 => "6" case 7 => "7" case 8 => "8" case 9 => "9" + case _ => natToString(n / 10) + natToString(n % 10) + } + + function stringToNat(s: stringNat): nat + decreases |s| + { + if |s| == 1 then + match s[0] + case '0' => 0 case '1' => 1 case '2' => 2 case '3' => 3 case '4' => 4 + case '5' => 5 case '6' => 6 case '7' => 7 case '8' => 8 case '9' => 9 + else + stringToNat(s[..|s|-1])*10 + stringToNat(s[|s|-1..|s|]) + } + + lemma natToStringThenStringToNatIdem(n: nat) + ensures stringToNat(natToString(n)) == n + { // Proof is automatic + } + lemma stringToNatThenNatToStringIdem(n: stringNat) + ensures natToString(stringToNat(n)) == n + { // Proof is automatic + } +} \ No newline at end of file diff --git a/src/Parsers/parser.dfy b/src/Parsers/parser.dfy new file mode 100644 index 00000000..b1a5a3d5 --- /dev/null +++ b/src/Parsers/parser.dfy @@ -0,0 +1,1250 @@ +include "library.dfy" + +module {:options "-functionSyntax:4", "-quantifierSyntax:4"} Parsers { + datatype Either<+L, +R> = Left(l: L) | Right(r: R) + // Type to return when using the Or parser + + datatype FailureLevel = Error | Recoverable + // PFailure level for parse results. An Error will be propagated to the top + // while a Recoverable can be caught by a disjunctive pattern. + // For example, if the parser Const?() fails, then it returns a Recoverable, + // but the parser Const() will return an error. + + datatype ParseResult<+T> = + // ParseResult is a failure-compatible type + | PFailure(level: FailureLevel, message: string, pos: nat) + // Returned in a parser failed + | PSuccess(pos: nat, t: T) + // Returns if a parser succeeds + { + predicate IsFailure() { PFailure? } + function PropagateFailure(): ParseResult requires IsFailure() { + match this + case PFailure(level, message, position) => PFailure(level, message, position) + } + function Extract(): (nat, T) requires PSuccess? { (pos, t) } + function MapResult(f: T -> T'): ParseResult { + match this + case PSuccess(n, t) => PSuccess(n, f(t)) + case PFailure(level, message, position) => PFailure(level, message, position) + } + } + + + type Parser<+T> = nat --> ParseResult + // A parser is a function from a position to a parse result. + // It might have preconditions + + // I wanted to use the following to make Parser a subset type + // but this causes issues in verification, so I'm inlining this + // condition wherever needed + ghost predicate IsRegular_(p: Parser, size: nat) { + forall pos: nat | pos <= size :: + && p.requires(pos) + && (p(pos).PSuccess? ==> pos <= p(pos).pos <= size) + } + + type ParserSelector<+T> = string --> Parser + // A parser selector is a function that, given a name that exists, + // returns a parser associated to this name + + type ParserMapper = (ParserSelector, nat) --> ParseResult + // A parser mapper is the building block of recursive parser. Given a parser selector and a position, + // it returns a parsing result. So it's like a parser, but it can also invoke other parsers. + + // Parser combinators. The following functions make it possible to create and compose parsers + + function {:opaque} Succeed_(ghost size: nat, t: T): (p: Parser) + ensures forall pos: nat | pos <= size :: p.requires(pos) && p(pos).PSuccess? && pos == p(pos).pos <= size + // A parser that does not consume any input and returns the given value + { + (pos: nat) requires pos <= size => PSuccess(pos, t) + } + + function {:opaque} Epsilon_(ghost size: nat): (p: Parser<()>) + ensures forall pos: nat | pos <= size :: p.requires(pos) && p(pos).PSuccess? && pos == p(pos).pos + { Succeed_(size, ()) } + + function Fail(message: string, level: FailureLevel := Recoverable): Parser + // A parser that does not consume any input and returns the given failure + { + (pos: nat) => PFailure(level, message, pos) + } + + ghost predicate BindSpec(size: nat, left: Parser, right: (L, nat) --> Parser, pos: nat) + // Encodes the fact that, at position "pos", left and right can be correctly concatenated + { + && left.requires(pos) + && (left(pos).PSuccess? ==> pos <= left(pos).pos <= size) + && (left(pos).PSuccess? ==> + && right.requires(left(pos).t, left(pos).pos) + && var right' := right(left(pos).t, left(pos).pos); + && right'.requires(left(pos).pos) + && (right'(left(pos).pos).PSuccess? ==> left(pos).pos <= right'(left(pos).pos).pos <= size) + ) + } + + function {:opaque} Bind_( + ghost size: nat, + left: Parser, + right: (L, nat) --> Parser + ) : (p: Parser) + // A parser such that, if the left parser succeeds, then the right parser is obtained by using the result of the left parser + ensures forall pos: nat | BindSpec(size, left, right, pos) + :: p.requires(pos) + && (p(pos).PSuccess? ==> + && left(pos).PSuccess? + && left(pos).pos <= p(pos).pos <= size + && p(pos) == right(left(pos).t, left(pos).pos)(left(pos).pos)) + { + (pos: nat) + requires BindSpec(size, left, right, pos) + => + var (pos, l) :- left(pos); + var (pos, r) :- right(l, pos)(pos); + PSuccess(pos, r) + } + + ghost predicate MapSpec(size: nat, underlying: Parser, mappingFunc: T --> U, pos: nat) + // Encodes the proposition that, if the underlying function successfully parses at position 'pos', + // then the mapping function should succeed on the result of the parsing + { + && underlying.requires(pos) + && (underlying(pos).PSuccess? ==> pos <= underlying(pos).pos <= size + && mappingFunc.requires(underlying(pos).t)) + } + + function {:opaque} Map_(ghost size: nat, underlying: Parser, mappingFunc: T --> U) + : (p: Parser) + // A parser combinator that makes it possible to transform the result of a parser in another one + // The mapping function can be partial + ensures forall pos: nat | MapSpec(size, underlying, mappingFunc, pos) + :: && p.requires(pos) + && (p(pos).PSuccess? ==> pos <= p(pos).pos <= size) + { + (pos: nat) requires MapSpec(size, underlying, mappingFunc, pos) + => + var (pos, t) :- underlying(pos); + var u := mappingFunc(t); + PSuccess(pos, u) + } + + ghost predicate ConcatSpec_( + size: nat, + left: Parser, + right: Parser, + pos: nat) + // Encodes the proposition that, if the left parser succeeds on pos, then it should be possible to apply the + // right parser after the left one. + { + && pos <= size + && left.requires(pos) + && (left(pos).PSuccess? ==> + && pos <= left(pos).pos <= size + && right.requires(left(pos).pos) + && (right(left(pos).pos).PSuccess? ==> + left(pos).pos <= right(left(pos).pos).pos <= size) + ) + } + + function {:opaque} Concat_(ghost size: nat, + left: Parser, + right: Parser + ) : (p: Parser<(L, R)>) + // Makes it possible to concatenate two consecutive parsers and return the pair of the results + ensures forall pos: nat | + ConcatSpec_(size, left, right, pos) + :: p.requires(pos) + && (p(pos).PSuccess? ==> pos <= p(pos).pos <= size) + { + (pos: nat) requires ConcatSpec_(size, left, right, pos) + => + var (pos, l) :- left(pos); + var (pos, r) :- right(pos); + PSuccess(pos, (l, r)) + } + + function {:opaque} ConcatR_( + ghost size: nat, + left: Parser, + right: Parser + ) : (p: Parser) + // Same as Concat but only returns the second result + ensures forall pos: nat | + ConcatSpec_(size, left, right, pos) + :: p.requires(pos) + && (p(pos).PSuccess? ==> pos <= p(pos).pos <= size) + { + (pos: nat) + requires left.requires(pos) + requires left(pos).PSuccess? ==> right.requires(left(pos).pos) + => + var (pos, l) :- left(pos); + var (pos, r) :- right(pos); + PSuccess(pos, r) + } + + function {:opaque} ConcatL_( + ghost size: nat, + left: Parser, + right: Parser + ) : (p: Parser) + // Same as Concat but only returns the first result + ensures forall pos: nat | + ConcatSpec_(size, left, right, pos) + :: p.requires(pos) + && (p(pos).PSuccess? ==> pos <= p(pos).pos <= size) + { + (pos: nat) + requires left.requires(pos) + requires left(pos).PSuccess? ==> right.requires(left(pos).pos) + => + var (pos, l) :- left(pos); + var (pos, r) :- right(pos); + PSuccess(pos, l) + } + + ghost predicate RepeatSpec(underlying: Parser, pos: nat, remaining: nat, size: nat) + // Encodes the proposition that, for all position between pos and size included, + // the underlying parser should be able to parse there, and if it succeeds, then + // its output position should be greater than the original parsing position, + // but bounded by size + { + && pos + remaining == size + && (forall pos' | pos <= pos' <= size :: + && underlying.requires(pos') + && (underlying(pos').PSuccess? ==> pos' <= underlying(pos').pos <= size)) + } + + function {:opaque} Repeat_( + ghost remaining: nat, + ghost size: nat, + underlying: Parser + ): (p: Parser>) + // Given a parser on a string of size 'size' and with 'remaining' characters to parse + // (typically obtained by size - pos), returns a parser that can repeatedly parse the string + // This parser returns a sequence of all possible parsers + // If the underlying parser fails, then an empty sequence is returned + ensures forall pos: nat | RepeatSpec(underlying, pos, remaining, size) :: + && p.requires(pos) + && (p(pos).PSuccess? ==> pos <= p(pos).pos <= size) + && (p(pos).PFailure? ==> p(pos).level != Recoverable) + { + (pos: nat) requires RepeatSpec(underlying, pos, remaining, size) => + match underlying(pos) + case PSuccess(pos', head) => + if pos' <= pos then PSuccess(pos', [head]) else + match Repeat_(remaining-(pos'-pos), size, underlying)(pos') { + case PSuccess(pos'', tail) => + PSuccess(pos'', [head] + tail) + case PFailure(Error, message, pos'') => + PFailure(Error, message, pos') + } + case PFailure(Error, message, pos') => + PFailure(Error, message, pos') + case PFailure(Recoverable, message, pos') => + PSuccess(pos, []) + } + + function {:opaque} {:tailrecursion true} Repeat0( + underlying: Parser, + pos: nat, + acc: seq, + ghost size: nat + ): (p: ParseResult>) + // Alternative version of Repeat that does not return a parser, but directly the ParseResult + decreases size - pos + requires pos <= size + //requires forall pos: nat :: RepeatSpec(underlying, pos, remaining, size) + requires RepeatSpec(underlying, pos, size-pos, size) + ensures p.PFailure? ==> p.level != Recoverable + ensures p.PSuccess? ==> pos <= p.pos <= size + { + match underlying(pos) + case PSuccess(pos', head) => + if pos' <= pos then PSuccess(pos', acc + [head]) else + Repeat0(underlying, pos', acc + [head], size) + case PFailure(Error, message, pos') => + PFailure(Error, message, pos') + case PFailure(Recoverable, message, pos') => + PSuccess(pos, acc) + } + + lemma Repeat0DoesIncreasePosSometimes(underlying: Parser, pos: nat, size: nat) + requires pos <= size + requires RepeatSpec(underlying, pos, size-pos, size) + requires underlying.requires(pos) && underlying(pos).PSuccess? && pos < underlying(pos).pos + ensures + var result := Repeat0(underlying, pos, [], size); + result.PSuccess? && 0 < |result.t| ==> pos < result.pos + { + reveal Repeat0(); + } + + + function {:opaque} RepeatAcc_( + underlying: Parser, + pos: nat, + init: I, + combine: (I, T) -> I, + ghost size: nat + ): (p: ParseResult) + // Alternative version of Repeat that does not return a parser, but directly the ParseResult + decreases size - pos + requires pos <= size + //requires forall pos: nat :: RepeatSpec(underlying, pos, remaining, size) + requires RepeatSpec(underlying, pos, size-pos, size) + ensures p.PFailure? ==> p.level != Recoverable + ensures p.PSuccess? ==> pos <= p.pos <= size + { + match underlying(pos) + case PSuccess(pos', head) => + if pos' <= pos then PSuccess(pos', combine(init, head)) else + match RepeatAcc_(underlying, pos', combine(init, head), combine, size) { + case PSuccess(pos'', tail) => + PSuccess(pos'', tail) + case PFailure(Error, message, pos'') => + PFailure(Error, message, pos') + } + case PFailure(Error, message, pos') => + PFailure(Error, message, pos') + case PFailure(Recoverable, message, pos') => + PSuccess(pos, init) + } + + ghost predicate FixSpecInner(size: nat, callback: Parser, u: nat) + // Specificaiton for Fixpoint. In other terms, verifies that the callback parser + // accepts all positions between u (exclusive) and size (inclusive) + { + && u <= size + && forall u': nat | u < u' <= size :: + callback.requires(u') + && (callback(u').PSuccess? ==> u' <= callback(u').pos <= size) + } + + function {:opaque} Fixpoint_( + ghost size: nat, + underlying: (Parser, nat) --> ParseResult, + pos: nat + ): (p: ParseResult) + // Given a function that combines a (recursive) parser and a position to obtain a parse result, + // returns the parse result associated to recursively applying the function. + // If partially applied on "underlying" and "size", it would returns the solution to the equation: + // f = pos => underlying(f, pos) + decreases size - pos + requires pos <= size + requires + forall callback: Parser, u: nat | FixSpecInner(size, callback, u) :: + && underlying.requires(callback, u) + && (underlying(callback, u).PSuccess? ==> u <= underlying(callback, u).pos <= size) + ensures p.PSuccess? ==> pos <= p.pos <= size + { + var callback: Parser := (pos': nat) requires pos < pos' <= size => + Fixpoint_(size, underlying, pos'); + underlying(callback, pos) + } + + ghost predicate FixMapSpecInner(fun: string, functions: set, size: nat, callback: string --> nat --> ParseResult, u: nat) + // Specification for FixpointMap. + // Ensures that, for any other function, if this function is in the set of admissible `functions`, + // then callback should not only accept it, but then accept any position at a second argument if + // 1) This position is strictly greater than the current position u + // 2) Or this position is the same but the function name is smaller. + { + && u <= size + && forall fun': string <- functions :: + && callback.requires(fun') + && forall u': nat | u < u' <= size || (u == u' && |fun'| < |fun|) :: + callback(fun').requires(u') + && var x := callback(fun')(u'); + && (x.PSuccess? ==> u' <= x.pos <= size) + } + + ghost predicate FixpointMapSpecOnce(fun': string, impl: ParserMapper, otherFuns: set, size: nat) + { + forall callback: ParserSelector, u: nat | + && FixMapSpecInner(fun', otherFuns, size, callback, u) + :: impl.requires(callback, u) + && var x := impl(callback, u); + && (x.PSuccess? ==> u <= x.pos <= size) + } + + function {:opaque} FixpointMap_( + ghost size: nat, + underlying: map Parser, nat) --> ParseResult>, + fun: string, + pos: nat + ): (p: ParseResult) + // Given a function that combines a (recursive) parser selector and a position to obtain a parse result, + // returns the parse result associated to recursively applying the function. + // If partially applied on "underlying" and "fun", it would return the solution f<"fun"> to the equations: + // f = pos => underlying[fun](f, pos) + decreases size - pos, |fun| + requires pos <= size + requires + && fun in underlying.Keys + && forall fun': string <- underlying.Keys :: + FixpointMapSpecOnce(fun', underlying[fun'], underlying.Keys, size) + ensures p.PSuccess? ==> pos <= p.pos <= size + { + var callback: string --> nat --> ParseResult + := (fun': string) requires fun' in underlying.Keys => + (pos': nat) requires pos < pos' <= size || (pos' == pos && |fun'| < |fun|) => + FixpointMap_(size, underlying, fun', pos'); + underlying[fun](callback, pos) + } + + ghost predicate OrSpec( + size: nat, + left: Parser, + right: Parser, + pos: nat) + // Verifies that the two parsers can both be applied at the given position + { + && left.requires(pos) + && (left(pos).PSuccess? ==> pos <= left(pos).pos <= size) + && right.requires(pos) + && (right(pos).PSuccess? ==> pos <= right(pos).pos <= size) + // Since requires are only used for progression, I don't have a use case in which the following is useful + /*&& var l := left(pos); + && (l.PFailure? && l.level == Recoverable ==> right.requires(pos))*/ + } + + function {:opaque} Or_( + ghost size: nat, + left: Parser, + right: Parser + ) : (p: Parser) + // Builds a parser from left and right such that, if left fails and is recoverable, then right is used instead. + ensures forall pos: nat | OrSpec(size, left, right, pos) + :: p.requires(pos) + && (p(pos).PSuccess? ==> pos <= p(pos).pos <= size) + { + (pos: nat) requires OrSpec(size, left, right, pos) + => + match left(pos) + case PFailure(Recoverable, message, pos') => right(pos) + case PFailure(Error, message, pos') => PFailure(Error, message, pos') + case PSuccess(pos, l) => PSuccess(pos, l) + } + + ghost predicate EitherPSpec(size: nat, left: Parser, right: Parser, pos: nat) { + && left.requires(pos) + && (left(pos).PSuccess? ==> pos <= left(pos).pos <= size) + && (left(pos).PFailure? && left(pos).level == Recoverable ==> + right.requires(pos) + && (right(pos).PSuccess? ==> pos <= right(pos).pos <= size)) + } + + function {:opaque} EitherP_( + ghost size: nat, + left: Parser, + right: Parser + ) : (p: Parser>) + // Same as 'Or' but supports returning two heterogeneous values + ensures forall pos: nat | + EitherPSpec(size, left, right, pos) + :: p.requires(pos) + && (p(pos).PSuccess? ==> pos <= p(pos).pos <= size) + { + (pos: nat) + requires EitherPSpec(size, left, right, pos) + => + match left(pos) + case PFailure(Recoverable, message, pos') => right(pos).MapResult(r => Right(r)) + case PFailure(Error, message, pos') => PFailure(Error, message, pos') + case PSuccess(pos, l) => PSuccess(pos, Left(l)) + } +} + +module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserTheorems { + import opened Parsers + // The remaining are interesting proofs about some equivalence but are not useful + + function {:opaque} Map__(ghost size: nat, underlying: Parser, mappingFunc: T --> U) + : (p: Parser) + // Map is equivalent to a bind method: + ensures forall pos: nat | MapSpec(size, underlying, mappingFunc, pos) + :: p.requires(pos) + { + var p := Bind_(size, underlying, (t: T, pos': nat) requires mappingFunc.requires(t) => Succeed_(size, mappingFunc(t))); + assert forall pos: nat | MapSpec(size, underlying, mappingFunc, pos) :: + p.requires(pos) by { + forall pos: nat | MapSpec(size, underlying, mappingFunc, pos) + ensures p.requires(pos) + { + var left := underlying; + var right := (t: T, pos': nat) requires mappingFunc.requires(t) => Succeed_(size, mappingFunc(t)); + assert BindSpec(size, left, right, pos); + } + } + p + } + + lemma Map_Map2(size: nat, underlying: Parser, mappingFunc: T --> U, pos: nat) + requires MapSpec(size, underlying, mappingFunc, pos) + ensures + && Map__(size, underlying, mappingFunc)(pos) == Map_(size, underlying, mappingFunc)(pos) + { + reveal Map_(); + reveal Map__(); + reveal Bind_(); + } + + function {:opaque} Concat__(ghost size: nat, left: Parser, right: Parser) + : (p: Parser<(T, U)>) + // Concat is equivalent to two binds methods + ensures forall pos: nat | ConcatSpec_(size, left, right, pos) + :: p.requires(pos) + { + Bind_(size, left, (t: T, pos': nat) requires right.requires(pos') => + Bind_(size, right, (u: U, pos'': nat) => Succeed_(size, (t, u)))) + } + + lemma Concat_Concat2(size: nat, left: Parser, right: Parser, pos: nat) + requires ConcatSpec_(size, left, right, pos) + ensures BindSpec(size, left, (t: T, pos': nat) requires right.requires(pos') => + Bind_(size, right, (u: U, pos'': nat) => Succeed_(size, (t, u))), pos) + // TODO: Bug to report. Concat_() should not be needed + ensures Concat_(size, left, right)(pos) == Concat__(size, left, right)(pos) + { + reveal Bind_(); + reveal Concat_(); + reveal Concat__(); + } +} + +// Nice DSL to build parsers +// B(p) returns a parser builder from a normal parser. +// B1.o_I(B2) will parse both but return the result of B2 +// B1.I_o(B2) will parse both but return the result of B1 +// B.M(f) will map the result of the parser builder by f if succeeded +// B1.O(B2) will either parse B1, or B2 if B1 fails with Recoverable +// FirstOf([B1, B2, B3]) +// will parse with B1, but if B1 fails with Recoverable, +// it will parse with B2, but if B2 fails with Recoverable, +// it will parse with B3 +// R(v) returns a parser builder that returns immediately v +// +// There are more parser builders in the trait Engine, when their spec depends on +// a predetermined input, e.g. to tests for constant strings + +module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserBuilders { + import opened Parsers + //import opened ParserEngine + + // Wrap the constructor in a class where the size is constant so that users + // don't need to provide it. + datatype ParserBuilder = B_(ghost size: nat, apply: Parser) + { + function {:opaque} o_I(other: ParserBuilder): (p: ParserBuilder) + requires size == other.size + ensures p.size == size + ensures forall pos: nat | + ConcatSpec_(size, apply, other.apply, pos) + :: p.apply.requires(pos) + && (p.apply(pos).PSuccess? ==> pos <= p.apply(pos).pos <= size) + { + B_(size, ConcatR_(size, apply, other.apply)) + } + function {:opaque} I_o(other: ParserBuilder): (p: ParserBuilder) + requires size == other.size + ensures p.size == size + ensures forall pos: nat | + ConcatSpec_(size, apply, other.apply, pos) + :: p.apply.requires(pos) + && (p.apply(pos).PSuccess? ==> pos <= p.apply(pos).pos <= size) + { + B_(size, ConcatL_(size, apply, other.apply)) + } + function {:opaque} M(mappingFunc: T --> U): (p: ParserBuilder) + ensures p.size == size + ensures forall pos: nat | + MapSpec(size, apply, mappingFunc, pos) + :: p.apply.requires(pos) + && (p.apply(pos).PSuccess? ==> pos <= p.apply(pos).pos <= size) + { + B_(size, Map_(size, apply, mappingFunc)) + } + function {:opaque} O(other: ParserBuilder): (p: ParserBuilder) + requires size == other.size + ensures size == p.size + ensures forall pos: nat | + OrSpec(size, apply, other.apply, pos) + :: p.apply.requires(pos) + && (p.apply(pos).PSuccess? ==> pos <= p.apply(pos).pos <= size) + { + B_(size, Or_(size, apply, other.apply)) + } + + function {:opaque} Then(other: (T, nat) --> ParserBuilder): (p: ParserBuilder) + ensures size == p.size + ensures forall pos: nat | + BindSpec(size, apply, (t: T, pos': nat) requires other.requires(t, pos') => other(t, pos').apply, pos) + :: p.apply.requires(pos) + && ( + p.apply(pos).PSuccess? ==> + && apply(pos).PSuccess? + && apply(pos).pos <= p.apply(pos).pos <= size) + { + B_(size, Bind_(size, apply, (t: T, pos': nat) requires other.requires(t, pos') => other(t, pos').apply)) + } + + function {:opaque} Repeat(init: T, combine: (T, T) -> T): (p: ParserBuilder) + ensures size == p.size + ensures forall pos: nat | pos <= size && RepeatSpec(apply, pos, size-pos, size) :: + p.apply.requires(pos) + && (p.apply(pos).PSuccess? ==> pos <= p.apply(pos).pos <= size) + + { + B_(size, + (pos: nat) + requires pos <= size + requires RepeatSpec(apply, pos, size-pos, size) + => RepeatAcc_(apply, pos, init, combine, size)) + } + } + function {:opaque} FirstOf_(ghost size: nat, others: seq>): (p: ParserBuilder) + requires |others| > 0 + requires forall other <- others :: other.size == size + ensures p.size == size + ensures + forall pos: nat | + forall pp | pp in others :: pp.apply.requires(pos) && (pp.apply(pos).PSuccess? ==> pos <= pp.apply(pos).pos <= size) + :: + p.apply.requires(pos) && (p.apply(pos).PSuccess? ==> pos <= p.apply(pos).pos <= size) + { + if |others| == 1 then others[0] + else + var p := others[0].O(FirstOf_(size, others[1..])); + assert forall pos: nat | + forall pp | pp in others :: pp.apply.requires(pos) && (pp.apply(pos).PSuccess? ==> pos <= pp.apply(pos).pos <= size) + :: + p.apply.requires(pos) && (p.apply(pos).PSuccess? ==> pos <= p.apply(pos).pos <= size) by { + } + p + } + function R_(ghost size: nat, t: T): (p: ParserBuilder) + { + B_(size, Succeed_(size, t)) + } + datatype FixMapParserBuilder = FixMapParserBuilder(ghost size: nat, ghost functions: set, underlying: map> := map[]) + { + static function Empty(ghost size: nat, ghost functions: set): (b: FixMapParserBuilder) ensures b.Valid() { + FixMapParserBuilder(size, functions, map[]) + } + ghost predicate Valid() { + forall fun <- underlying :: FixpointMapSpecOnce(fun, underlying[fun], functions, size) + } + function {:opaque} Add(name: string, mapper: ParserMapper): (f: FixMapParserBuilder) + requires Valid() + requires name !in underlying + requires FixpointMapSpecOnce(name, mapper, functions, size) + ensures f.Valid() + ensures f.functions == functions + ensures f.size == size + ensures name in f.underlying + ensures this.underlying.Keys + {name} == f.underlying.Keys + { + this.(underlying := underlying[name := mapper]) + } + } +} + +// Defines an "Engine" trait to be extended, which gives access to more +// parser combinators and parser builders that require access to an input string +module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { + import opened ParserBuilders + import opened Parsers + import opened Library + import opened Printer + + // Engine defines the following parsers: + // Succeed(v) Always succeeds with the given value + // Bind(l, r) If l succeeds, returns the result of running r on the result + // Epsilon Always succeeds and returns () + // Map(p, f) If p succeeds, maps its result with f + // Concat(l, r) if l and r succeed consecutively, pair their results + // ConcatL(l, r) if l and r succeed consecutively, returns the value of l + // ConcatR(l, r) if l and r succeed consecutively, returns the value of r + // Or(l, r) Returns the first of l or r which succeeds + // EitherP(l, r) Returns the first of l or r which succeeds, wrapped in Either type + // Char('c') fails with Error if 'c' is not at the given position. + // Char?('c') fails with Recoverable if 'c' is not at the given position. + // Const("string") fails with Error if "string" is not at the given position. + // Const?("string") fails with Recoverable if "string" is not at the given position. + // Rep(parser) repeats the parser as much as possible and returns the sequence of results + // Fix((result, pos) => parseResult) returns a parser that recursively applies the provided function when needed + // FixMap((result, pos) => parseResult) Same as fix but can provide a mapping from string to functions instead of a single function + // EOS Succeeds if we reached the end of the string, fails with Error otherwise + // FirstOf([p*]) returns the first parser which succeeds + // Maybe(p) If p succeeds, returns Some(p.result), otherwise returns None if p fails with Recoverable + // DebugParser(msg, p) Prints the given message and pass through p + // + // Engine defines the following parser builders (please avoid them because they are not performant!) + // C("string") fails with Error if "string" is not at the given position. + // C?("string") fails with Recoverable if "string" is not at the given position. + // B(p) wraps a regular parser + // R(v) A parser builder that returns the given value + trait {:termination false} Engine { + const input: string + + function {:opaque} Succeed(t: T): (p: Parser) + ensures forall pos: nat | pos <= |input| :: p.requires(pos) && p(pos).PSuccess? && pos == p(pos).pos <= |input| + // A parser that does not consume any input and returns the given value + { + (pos: nat) requires pos <= |input| => PSuccess(pos, t) + } + + function {:opaque} Bind( + left: Parser, + right: (L, nat) --> Parser + ) : (p: Parser) + // A parser such that, if the left parser succeeds, then the right parser is obtained by using the result of the left parser + ensures forall pos: nat | BindSpec(|input|, left, right, pos) + :: p.requires(pos) + && (p(pos).PSuccess? ==> + && left(pos).PSuccess? + && left(pos).pos <= p(pos).pos <= |input| + && p(pos) == right(left(pos).t, left(pos).pos)(left(pos).pos)) + { + Bind_(|input|, left, right) + } + + function {:opaque} Epsilon(pos: nat): (pr: ParseResult<()>) + requires pos <= |input| + ensures pr.PSuccess? && pr.pos == pos + { + Epsilon_(|input|)(pos) + } + + function {:opaque} Map(underlying: Parser, mappingFunc: T --> U) + : (p: Parser) + // A parser combinator that makes it possible to transform the result of a parser in another one + // The mapping function can be partial + ensures forall pos: nat | MapSpec(|input|, underlying, mappingFunc, pos) + :: && p.requires(pos) + && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) + { + Map_(|input|, underlying, mappingFunc) + } + + ghost predicate MapFailureSpec(pos: nat, underlying: Parser) { + pos <= |input| && underlying.requires(pos) + } + + function {:opaque} MapFailure(underlying: Parser, mappingFunc: ParseResult --> ParseResult) + : (p: Parser) + requires forall p: ParseResult | p.PFailure? :: mappingFunc.requires(p) && mappingFunc(p).PFailure? + requires forall pos: nat | pos <= |input| :: + && underlying.requires(pos) + && (underlying(pos).PSuccess? ==> pos <= underlying(pos).pos <= |input|) + ensures forall pos: nat | MapFailureSpec(pos, underlying) :: + && p.requires(pos) + //&& (p(pos).PSuccess? <==> underlying(pos).PSuccess?) + && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) + { + (pos: nat) requires pos <= |input| => + var r := underlying(pos); + if r.PSuccess? then r else + mappingFunc(r) + } + + ghost predicate ConcatSpec(left: Parser, right: Parser, pos: nat) { + ConcatSpec_(|input|, left, right, pos) + } + + function {:opaque} Concat( + left: Parser, + right: Parser + ) : (p: Parser<(L, R)>) + // Makes it possible to concatenate two consecutive parsers and return the pair of the results + ensures forall pos: nat | + ConcatSpec_(|input|, left, right, pos) + :: p.requires(pos) + && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) + { + Concat_(|input|, left, right) + } + + function {:opaque} ConcatR( + left: Parser, + right: Parser + ) : (p: Parser) + // Makes it possible to concatenate two consecutive parsers and return the pair of the results + ensures forall pos: nat | + ConcatSpec_(|input|, left, right, pos) + :: p.requires(pos) + && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) + { + ConcatR_(|input|, left, right) + } + + function {:opaque} ConcatL( + left: Parser, + right: Parser + ) : (p: Parser) + // Makes it possible to concatenate two consecutive parsers and return the pair of the results + ensures forall pos: nat | + ConcatSpec_(|input|, left, right, pos) + :: p.requires(pos) + && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) + { + ConcatL_(|input|, left, right) + } + + function {:opaque} Or( + left: Parser, + right: Parser + ) : (p: Parser) + // Builds a parser from left and right such that, if left fails and is recoverable, then right is used instead. + ensures forall pos: nat | + OrSpec(|input|, left, right, pos) + :: p.requires(pos) + && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) + { + Or_(|input|, left, right) + } + + function {:opaque} EitherP( + left: Parser, + right: Parser + ) : (p: Parser>) + // Same as 'Or' but supports returning two heterogeneous values + ensures forall pos: nat | + EitherPSpec(|input|, left, right, pos) + :: p.requires(pos) + && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) + { + EitherP_(|input|, left, right) + } + + function {:opaque} Char(c: char): (p: Parser) + ensures forall pos: nat :: p.requires(pos) + ensures forall pos: nat :: p(pos).PSuccess? ==> + pos < |input| && p(pos).pos == pos + 1 + { + (pos: nat) => + if pos < |input| && input[pos] == c then PSuccess(pos + 1, c) + else PFailure(Error, "Expected '"+[c]+"'", pos) + } + + function {:opaque} Char?(c: char): (p: Parser) + ensures forall pos: nat :: p.requires(pos) + ensures forall pos: nat :: p(pos).PSuccess? ==> + pos < |input| && p(pos).pos == pos + 1 + { + (pos: nat) => + if pos < |input| && input[pos] == c then PSuccess(pos + 1, c) + else PFailure(Recoverable, "Expected a different char but that's ok", pos) + } + + // Returns a function that tests if, at the given position, we can find the string toTest + function {:opaque} TestString(toTest: string): (test: nat --> bool) + ensures forall pos: nat | pos <= |input| :: test.requires(pos) + { + (pos: nat) requires pos <= |input| => + pos + |toTest| <= |input| && input[pos..pos+|toTest|] == toTest + } + + // Returns a function that tests if, at the given position, we can find the string toTest + function {:opaque} TestNotString(toTest: string): (test: nat --> bool) + ensures forall pos: nat | pos <= |input| :: test.requires(pos) + { + (pos: nat) requires pos <= |input| => + !(pos + |toTest| <= |input| && input[pos..pos+|toTest|] == toTest) + } + + function {:opaque} CharTest?(test: nat --> bool): (p: Parser) + requires forall pos: nat | pos < |input| :: test.requires(pos) + ensures forall pos: nat | pos <= |input| :: + p.requires(pos) && + (p(pos).PSuccess? ==> pos < |input| && pos + 1 == p(pos).pos) + { + (pos: nat) requires pos <= |input| => + if pos < |input| && test(pos) then PSuccess(pos + 1, input[pos]) + else PFailure(Recoverable, "Expected a different char but that's ok", pos) + } + + ghost predicate ConstSpec(expected: string, p: Parser) { + forall pos: nat :: + && p.requires(pos) + && (p(pos).PSuccess? ==> + pos + |expected| <= |input| && p(pos).pos == pos + |expected|) + } + + function {:opaque} C(expected: string): (p: ParserBuilder) + ensures p.size == |input| + ensures ConstSpec(expected, p.apply) + { + B_(|input|, Const(expected)) + } + function {:opaque} C?(expected: string): (p: ParserBuilder) + ensures p.size == |input| + ensures ConstSpec(expected, p.apply) + { + B_(|input|, Const?(expected)) + } + function B(underlying: Parser): (p: ParserBuilder) + ensures p.size == |input| + { + B_(|input|, underlying) + } + + function {:opaque} Const(expected: string): (p: Parser) + ensures ConstSpec(expected, p) + { + (pos: nat) => + if pos + |expected| <= |input| && input[pos..pos + |expected|] == expected then PSuccess(pos + |expected|, expected) + else PFailure(Error, "Expected '"+expected+"'", pos) + } + + function {:opaque} Const?(expected: string): (p: Parser) + ensures ConstSpec(expected, p) + { + (pos: nat) => + if pos + |expected| <= |input| && input[pos..pos + |expected|] == expected then PSuccess(pos + |expected|, expected) + else PFailure(Recoverable, "Possibly expecting something else but that's ok", pos) + } + + function {:opaque} Maybe(underlying: Parser): (p: Parser>) + requires IsRegular(underlying) + ensures IsRegular(p) + { + Or(Map(underlying, (t: T) => Some(t)), Succeed(None)) + } + + function {:opaque} Newline(): (p: Parser) + ensures IsRegular(p) + { + Or(Const?("\r\n"), Or(Const?("\r"), Const("\n"))) + } + + function {:opaque} Test?(test: (string, nat) --> bool): (p: Parser<()>) + requires forall pos: nat | pos <= |input| :: test.requires(input, pos) + ensures forall pos: nat | pos <= |input| :: + p.requires(pos) && + (p(pos).PSuccess? ==> pos <= p(pos).pos) + { + (pos: nat) requires pos <= |input| => + if test(input, pos) then PSuccess(pos, ()) else PFailure(Recoverable, "Test failed", pos) + } + + // Given a test on a position, returns a parser that succeeds with the longest string + // starting at a given position which succeeds the test on every character + // If the test succeeds immediately, returns a recoverable failure instead + function {:opaque} While?(test: nat --> bool): (p: Parser) + requires forall pos: nat | pos <= |input| :: test.requires(pos) + ensures forall pos: nat | pos <= |input| :: p.requires(pos) + && (p(pos).PSuccess? ==> pos < p(pos).pos <= |input|) + { + var p := Bind(Rep(CharTest?(test)), + (result: string, pos': nat) => + if result == "" then Fail("Did not find an non-empty string satisfying test", Recoverable) + else Succeed(result)); + assert forall pos: nat | pos <= |input| :: p.requires(pos) + && (p(pos).PSuccess? ==> pos < p(pos).pos <= |input|) by { + forall pos : nat | pos <= |input| ensures p.requires(pos) + && (p(pos).PSuccess? ==> pos < p(pos).pos <= |input|){ + assert p.requires(pos); + if(p(pos).PSuccess?) { + RepDoesIncreasePosSometimes(CharTest?(test), pos); + } + } + } + p + } + + function {:opaque} EverythingUntilAndDrop(str: string): (p: Parser) + ensures forall pos: nat | pos <= |input| :: + p.requires(pos) + && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) + { + ConcatL(While?(TestNotString(str)), Const(str)) + } + + + ghost predicate RepSpec(underlying: Parser, pos: nat) { + && pos <= |input| + && (forall pos' | pos <= pos' <= |input| :: + && underlying.requires(pos') + && (underlying(pos').PSuccess? ==> pos' <= underlying(pos').pos <= |input|)) + } + + function {:opaque} Rep(underlying: Parser): (p: Parser>) + ensures forall pos: nat | RepSpec(underlying, pos) :: + p.requires(pos) + && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) + { + (pos: nat) + requires RepSpec(underlying, pos) + => + Repeat0(underlying, pos, [], |input|) + } + + lemma RepDoesIncreasePosSometimes(underlying: Parser, pos: nat) + requires pos <= |input| && RepeatSpec(underlying, pos, |input|-pos, |input|) + requires underlying.requires(pos) && underlying(pos).PSuccess? ==> + pos < underlying(pos).pos + ensures + var p := Rep(underlying); + (p(pos).PSuccess? && |p(pos).t| > 0 ==> pos < p(pos).pos) + { + reveal Rep(); + reveal Repeat0(); + } + + function {:opaque} RepAcc( + underlying: Parser, + init: I, + combine: (I, T) -> I + ): (p: Parser) + ensures forall pos: nat | RepSpec(underlying, pos) :: + && p.requires(pos) + && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) + { + (pos: nat) requires RepSpec(underlying, pos) => + RepeatAcc_(underlying, pos, init, combine, |input|) + } + + ghost predicate FixSpec(size: nat, underlying: (Parser, nat) --> ParseResult, pos: nat) { + && pos <= size + && forall callback: Parser, u: nat | + FixSpecInner(size, callback, u) :: + underlying.requires(callback, u) + && (underlying(callback, u).PSuccess? ==> u <= underlying(callback, u).pos <= size) + } + function GetFixBase(): map> { map[] } + + function {:opaque} Fix(underlying: (Parser, nat) --> ParseResult): (p: Parser) + ensures forall pos: nat| FixSpec(|input|, underlying, pos) + :: p.requires(pos) + && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) + { + (pos: nat) + requires FixSpec(|input|, underlying, pos) + => + Fixpoint_( + |input|, + underlying, + pos + ) + } + // TODO: Dafny clinic. Cannot make it opaque, otherwise + // even with reveal cannot verify other functions + ghost predicate FixMapInnerOnce(fun: string, impl: ParserMapper, otherFuns: set, size: nat) + { + FixpointMapSpecOnce(fun, impl, otherFuns, |input|) + } + + ghost predicate FixMapInner(size: nat, underlying: map>) { + forall fun: string <- underlying.Keys :: + FixMapInnerOnce(fun, underlying[fun], underlying.Keys, size) + } + + function {:opaque} FixMap( + underlying: map>, + fun: string + ): (p: Parser) + requires {:error "Second argument of FixMap might not be a key of the first"} fun in underlying.Keys + requires {:error "First argument might not satisfy FixMapInner(|input|, arg). Said otherwise, key/value pairs of the first argument might not all satisfy FixMapInnerOnce(key, value, keys, |input|)"} FixMapInner(|input|, underlying) + ensures forall pos: nat | pos <= |input| :: + && p.requires(pos) + && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) + { + (pos: nat) requires pos <= |input| => + //reveal FixMapInnerOnce(); + FixpointMap_( + |input|, + underlying, + fun, + pos + ) + } + function {:opaque} FixMapBuilder(ghost functions: set): (r: FixMapParserBuilder) + ensures r.Valid() + ensures |r.underlying.Keys| == 0 + ensures r.functions == functions + ensures r.size == |input| + { + var underlying: map> := map[]; + FixMapParserBuilder(|input|, functions, underlying) + } + + const EOS: Parser<()> := + (pos: nat) => if pos >= |input| then PSuccess(pos, ()) else PFailure(Error, "Expected end of string", pos) + + function {:opaque} FirstOf(others: seq>): (p: ParserBuilder) + requires |others| > 0 + requires forall other <- others :: other.size == |input| + ensures + forall pos: nat | + forall pp <- others :: + pp.apply.requires(pos) && (pp.apply(pos).PSuccess? ==> pos <= pp.apply(pos).pos <= |input|) + :: + p.apply.requires(pos) && (p.apply(pos).PSuccess? ==> pos <= p.apply(pos).pos <= |input|) + { + FirstOf_(|input|, others) + } + + function R(t: T): (p: ParserBuilder) + { + B(Succeed(t)) + } + + const natToDigit: seq := "0123456789" + const digitToNat: map := map i | 0 <= i < |natToDigit| :: natToDigit[i] := i + + function {:opaque} Digit(test: bool := true): (p: Parser) + ensures forall pos:nat | pos <= |input| :: + && p.requires(pos) + && (p(pos).PSuccess? ==> + && pos < |input| + && input[pos] in digitToNat + && digitToNat[input[pos]] == p(pos).t + && 0 <= p(pos).t <= 9 + && p(pos).pos == pos + 1) + { + (pos: nat) requires pos <= |input| => + if pos == |input| then PFailure(if test then Recoverable else Error, "Expected a digit", pos) else + if input[pos] in digitToNat then + PSuccess(pos + 1, digitToNat[input[pos]]) + else PFailure(if test then Recoverable else Error, "Expected a digit", pos) + } + + ghost predicate RecSpec(fun: string, otherFuns: set, rec: string --> Parser, pos: nat) + { + FixMapSpecInner(fun, otherFuns, |input|, rec, pos) + } + + ghost predicate RecSpecOnce(fun: string, otherFuns: set, mapper: ParserMapper) { + FixMapInnerOnce(fun, mapper, otherFuns, |input|) + } + + // TODO: We have the ability to define another parser given the result of the first one, + // but I'm missing the ability to call another parser builder with the result of the first one + // to avoid callbacks. + + function {:opaque} {:vcs_split_on_every_assert} Nat(test: bool := true): (p: Parser) + ensures forall pos: nat | pos <= |input| :: + && p.requires(pos) + && (p(pos).PSuccess? ==> pos < p(pos).pos <= |input|) + { + var d? := Digit(); + Bind(d?, (firstdigit: DigitNat, pos: nat) => + RepAcc(d?, firstdigit, + (previous, next) => previous*10 + next) + ) + } + + function {:opaque} {:vcs_split_on_every_assert} N(test: bool := true): (p: ParserBuilder) + ensures p.size == |input| && + forall pos: nat | pos <= |input| :: + && p.apply.requires(pos) + && (p.apply(pos).PSuccess? ==> pos < p.apply(pos).pos <= |input|) + { + B(Nat(test)) + } + + function {:opaque} Spaces?(): (r: Parser) + ensures forall pos: nat | pos <= |input| :: + r.requires(pos) + && (r(pos).PSuccess? ==> pos <= r(pos).pos <= |input|) + { + (pos: nat) requires pos <= |input| => + if pos < |input| && input[pos] in " \n\r\t" then + PSuccess(pos+1, input[pos..pos+1]) + else + PFailure(Recoverable, "Spaces", pos) + } + + function {:opaque} SkipSpaces(p: Parser): (r: Parser) + requires forall pos: nat | pos <= |input| :: + p.requires(pos) + && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) + ensures forall pos: nat | pos <= |input| :: + r.requires(pos) + && (r(pos).PSuccess? ==> pos <= r(pos).pos <= |input|) + { + ConcatR(Spaces?(), p) + } + function LineContainingPos(pos: nat, p: nat := 0, lineNumber: nat := 0, charNumber: nat := 0, startLinePos: nat := 0): (result: (string, nat, nat)) + decreases |input| - p + ensures 0 <= result.2 <= |input| + requires 0 <= charNumber <= p + requires startLinePos <= p <= |input| + { + if p >= |input| then + assert charNumber <= |input|; + (input[startLinePos..p], lineNumber, charNumber) + else + if input[p] == '\n' || p == |input| then + if pos <= p then + (input[startLinePos..p], lineNumber, charNumber) + else + LineContainingPos(pos, p + 1, lineNumber + 1, 0, p + 1) + else + LineContainingPos(pos, p + 1, lineNumber, if p <= pos then charNumber + 1 else charNumber, startLinePos) + } + + ghost predicate IsRegular(p: Parser) { + IsRegular_(p, |input|) + } + + /*function {:opaque} Regex(s: string): (r: Parser) + ensures forall pos: nat | pos <= |input| :: + r.requires(pos) + && (r(pos).PSuccess? ==> pos <= r(pos).pos <= |input|) + { + if s == "" then Epsilon + else + } by method { + + }*/ + + function FeedbackToString(result: ParseResult): string + requires result.PFailure? + { + var (line, lineNumber, charNumber) := LineContainingPos(result.pos); + result.message + " at position "+Printer.natToString(result.pos)+" line "+Printer.natToString(lineNumber)+", column "+Printer.natToString(charNumber)+":\n>"+ + line+"\n"+seq(charNumber, i => ' ')+"^\n" + } + method ReportError(p: ParseResult) + requires p.PFailure? + { + var (line, lineNumber, charNumber) := LineContainingPos(p.pos); + print "Parse error at position ",p.pos," line ",lineNumber,", column ",charNumber,":\n>", + line, "\n", seq(charNumber, i => ' '), "^\n", + p.message, "\n"; + return; + } + function Debug(message: string, x: T): T { + x + } by method { + print message, ":", x, "\n"; + return x; + } + + function DebugParserFail(msg: string): Parser { + (p: nat) => ParseResult.PFailure(Recoverable, "", Debug( + if p < |input| then msg + "'"+( + if input[p] == '\r' then "\\r" else if input[p] == '\n' then "\\n" else input[p..p+1]) + + "' " + FeedbackToString(PFailure(Recoverable, "", p)) + "\n" else + msg, p)) + } + + function {:opaque} DebugParser(msg: string, other: Parser): (p: Parser) + ensures IsRegular(other) ==> IsRegular(p) + { + var debugParser := DebugParserFail(msg+" (before)"); + var otherParserDebugged := (p: nat) requires other.requires(p) => Debug(msg+" (after)", other(p)); + Or(debugParser, otherParserDebugged) + } + } + class EngineTest extends Engine { + constructor() { + this.input := ""; + } + } + type DigitNat = d: nat | 0 <= d <= 9 +} \ No newline at end of file From aaf1fd7feccdb3ee93685f85843f684c55b1acbc Mon Sep 17 00:00:00 2001 From: Mikael Mayer Date: Fri, 10 Nov 2023 11:36:01 -0600 Subject: [PATCH 02/22] Started better refactoring --- src/Parsers/library.dfy | 19 - src/Parsers/parser.dfy | 1016 ++++++++++++++++++++++++--------------- 2 files changed, 627 insertions(+), 408 deletions(-) diff --git a/src/Parsers/library.dfy b/src/Parsers/library.dfy index b0b765e9..ca73b77d 100644 --- a/src/Parsers/library.dfy +++ b/src/Parsers/library.dfy @@ -1,22 +1,3 @@ -module {:options "/functionSyntax:4"} Library { - // Library - datatype Option = Some(value: T) | None - datatype Result = Success(value: T) | Failure(s: string, pos: int) { - predicate IsFailure() { - Failure? - } - function PropagateFailure(): Result - requires IsFailure() - { - Failure(s, pos) - } - function Extract(): T - requires !IsFailure() - { - value - } - } -} module {:options "/functionSyntax:4"} Printer { type stringNat = s: string | diff --git a/src/Parsers/parser.dfy b/src/Parsers/parser.dfy index b1a5a3d5..396bedb5 100644 --- a/src/Parsers/parser.dfy +++ b/src/Parsers/parser.dfy @@ -1,403 +1,637 @@ +include "../Wrappers.dfy" include "library.dfy" -module {:options "-functionSyntax:4", "-quantifierSyntax:4"} Parsers { - datatype Either<+L, +R> = Left(l: L) | Right(r: R) - // Type to return when using the Or parser - - datatype FailureLevel = Error | Recoverable - // PFailure level for parse results. An Error will be propagated to the top - // while a Recoverable can be caught by a disjunctive pattern. - // For example, if the parser Const?() fails, then it returns a Recoverable, - // but the parser Const() will return an error. - - datatype ParseResult<+T> = - // ParseResult is a failure-compatible type - | PFailure(level: FailureLevel, message: string, pos: nat) - // Returned in a parser failed - | PSuccess(pos: nat, t: T) - // Returns if a parser succeeds +// Functional parsers are consuming the string from the left to the right. +abstract module Parsers { + import Wrappers + import Printer + type Option = Wrappers.Option + + type C(!new) // The character of the sequence being parsed + + datatype Either<+L, +R> = + // Type to return when using the Or parser + Left(l: L) | Right(r: R) + + + datatype FailureLevel = + // PFailure level for parse results. An Error will be propagated to the top + // while a Recoverable can be caught by a disjunctive pattern. + // For example, if the parser Const?() fails, then it returns a Recoverable, + // but the parser Const() will return an error. + Error | Recoverable + + datatype ParseResult<+R> = + // ParseResult is a failure-compatible type + | PFailure(level: FailureLevel, message: string, remaining: seq) + // Returned if a parser failed + | PSuccess(result: R, remaining: seq) + // Returned if a parser succeeds, with the increment in the position { - predicate IsFailure() { PFailure? } - function PropagateFailure(): ParseResult requires IsFailure() { - match this - case PFailure(level, message, position) => PFailure(level, message, position) + predicate IsFailure() { + PFailure? } - function Extract(): (nat, T) requires PSuccess? { (pos, t) } - function MapResult(f: T -> T'): ParseResult { - match this - case PSuccess(n, t) => PSuccess(n, f(t)) - case PFailure(level, message, position) => PFailure(level, message, position) - } - } + function PropagateFailure(): ParseResult + requires IsFailure() + { + PFailure(level, message, remaining) + } - type Parser<+T> = nat --> ParseResult - // A parser is a function from a position to a parse result. - // It might have preconditions + function Extract(): (R, seq) + requires !IsFailure() + { + (result, remaining) + } - // I wanted to use the following to make Parser a subset type - // but this causes issues in verification, so I'm inlining this - // condition wherever needed - ghost predicate IsRegular_(p: Parser, size: nat) { - forall pos: nat | pos <= size :: - && p.requires(pos) - && (p(pos).PSuccess? ==> pos <= p(pos).pos <= size) + function MapResult(f: R -> R'): ParseResult { + match this + case PSuccess(result, remaining) => + PSuccess(f(result), remaining) + case PFailure(level, message, remaining) => + PFailure(level, message, remaining) + } } + type Parser<+R> = seq -> ParseResult + // A parser is a total function from a position to a parse result + // Because it returns a delta pos, it cannot return a position negative from the origing + // If the parsing is out of context, it will return a failure. - type ParserSelector<+T> = string --> Parser + type ParserSelector = string -> Option> // A parser selector is a function that, given a name that exists, // returns a parser associated to this name - type ParserMapper = (ParserSelector, nat) --> ParseResult + //type ParserMapper = (ParserSelector, nat) -> ParseResult // A parser mapper is the building block of recursive parser. Given a parser selector and a position, // it returns a parsing result. So it's like a parser, but it can also invoke other parsers. // Parser combinators. The following functions make it possible to create and compose parsers - function {:opaque} Succeed_(ghost size: nat, t: T): (p: Parser) - ensures forall pos: nat | pos <= size :: p.requires(pos) && p(pos).PSuccess? && pos == p(pos).pos <= size + opaque function Succeed_(result: R): (p: Parser) // A parser that does not consume any input and returns the given value + // This is a generic function, it's better to use the Succeed function on strings. { - (pos: nat) requires pos <= size => PSuccess(pos, t) + (input: seq) => PSuccess(result, input) } - function {:opaque} Epsilon_(ghost size: nat): (p: Parser<()>) - ensures forall pos: nat | pos <= size :: p.requires(pos) && p(pos).PSuccess? && pos == p(pos).pos - { Succeed_(size, ()) } + lemma AboutSucceed_(result: R, input: seq) + ensures + var p := Succeed_(result); + && p(input).PSuccess? + && p(input).remaining == input + { reveal Succeed_(); } + + opaque function Epsilon_(): (p: Parser<()>) + { Succeed_(()) } + + lemma AboutEpsilon_(input: seq) + ensures + var p := Epsilon_(); + && p(input).PSuccess? + && p(input).remaining == input + { + reveal Epsilon_(); + reveal Succeed_(); + } - function Fail(message: string, level: FailureLevel := Recoverable): Parser + opaque function Fail_(message: string, level: FailureLevel := Recoverable): Parser // A parser that does not consume any input and returns the given failure { - (pos: nat) => PFailure(level, message, pos) + (input: seq) => PFailure(level, message, input) } - ghost predicate BindSpec(size: nat, left: Parser, right: (L, nat) --> Parser, pos: nat) - // Encodes the fact that, at position "pos", left and right can be correctly concatenated + lemma AboutFail_(message: string, level: FailureLevel, input: seq) + ensures + var p := Fail_(message, level)(input); + && p.PFailure? + && p.message == message + && p.level == level + && p.remaining == input { - && left.requires(pos) - && (left(pos).PSuccess? ==> pos <= left(pos).pos <= size) - && (left(pos).PSuccess? ==> - && right.requires(left(pos).t, left(pos).pos) - && var right' := right(left(pos).t, left(pos).pos); - && right'.requires(left(pos).pos) - && (right'(left(pos).pos).PSuccess? ==> left(pos).pos <= right'(left(pos).pos).pos <= size) - ) + reveal Fail_(); } - function {:opaque} Bind_( - ghost size: nat, + lemma AboutFail_2(message: string, input: seq) + ensures + var p := Fail_(message)(input); + && p.PFailure? + && p.message == message + && p.level == Recoverable + && p.remaining == input + { + reveal Fail_(); + } + + opaque function Bind_( left: Parser, - right: (L, nat) --> Parser + right: (L, seq) -> Parser ) : (p: Parser) - // A parser such that, if the left parser succeeds, then the right parser is obtained by using the result of the left parser - ensures forall pos: nat | BindSpec(size, left, right, pos) - :: p.requires(pos) - && (p(pos).PSuccess? ==> - && left(pos).PSuccess? - && left(pos).pos <= p(pos).pos <= size - && p(pos) == right(left(pos).t, left(pos).pos)(left(pos).pos)) { - (pos: nat) - requires BindSpec(size, left, right, pos) + (input: seq) => - var (pos, l) :- left(pos); - var (pos, r) :- right(l, pos)(pos); - PSuccess(pos, r) + var (leftResult, remaining) :- left(input); + right(leftResult, remaining)(remaining) } - ghost predicate MapSpec(size: nat, underlying: Parser, mappingFunc: T --> U, pos: nat) - // Encodes the proposition that, if the underlying function successfully parses at position 'pos', - // then the mapping function should succeed on the result of the parsing + lemma AboutBind_( + left: Parser, + right: (L, seq) -> Parser, + input: seq + ) + ensures + var p := Bind_(left, right)(input); + && var leftResult := left(input); + && !leftResult.IsFailure() + ==> var leftValues := left(input).Extract(); + && var rightResult := right(leftValues.0, leftValues.1)(leftValues.1); + && !rightResult.IsFailure() + ==> && !p.IsFailure() + && p.remaining == rightResult.remaining + && p.result == rightResult.result { - && underlying.requires(pos) - && (underlying(pos).PSuccess? ==> pos <= underlying(pos).pos <= size - && mappingFunc.requires(underlying(pos).t)) + reveal Bind_(); } - function {:opaque} Map_(ghost size: nat, underlying: Parser, mappingFunc: T --> U) + opaque function Map_(underlying: Parser, mappingFunc: R -> U) : (p: Parser) // A parser combinator that makes it possible to transform the result of a parser in another one // The mapping function can be partial - ensures forall pos: nat | MapSpec(size, underlying, mappingFunc, pos) - :: && p.requires(pos) - && (p(pos).PSuccess? ==> pos <= p(pos).pos <= size) + // ensures forall pos | MapSpec(size, underlying, mappingFunc, pos) :: + // p.requires(pos) { - (pos: nat) requires MapSpec(size, underlying, mappingFunc, pos) - => - var (pos, t) :- underlying(pos); - var u := mappingFunc(t); - PSuccess(pos, u) + (input: seq) => + var (result, remaining) :- underlying(input); + var u := mappingFunc(result); + PSuccess(u, remaining) } - ghost predicate ConcatSpec_( - size: nat, - left: Parser, - right: Parser, - pos: nat) - // Encodes the proposition that, if the left parser succeeds on pos, then it should be possible to apply the - // right parser after the left one. + lemma AboutMap_(underlying: Parser, mappingFunc: R -> U, input: seq) + ensures var p := Map_(underlying, mappingFunc); + && (underlying(input).PSuccess? <==> p(input).PSuccess?) + && (p(input).PSuccess? ==> + && p(input).remaining == underlying(input).remaining + && p(input).result == mappingFunc(underlying(input).result)) { - && pos <= size - && left.requires(pos) - && (left(pos).PSuccess? ==> - && pos <= left(pos).pos <= size - && right.requires(left(pos).pos) - && (right(left(pos).pos).PSuccess? ==> - left(pos).pos <= right(left(pos).pos).pos <= size) - ) + reveal Map_(); + reveal Bind_(); + reveal Succeed_(); + } + + function BindMapCallback(mappingFunc: R -> U): + (R, seq) -> Parser + { + (result: R, remaining: seq) => Succeed_(mappingFunc(result)) + } + + lemma AboutMap_Bind_(underlying: Parser, mappingFunc: R -> U, input: seq) + ensures Map_(underlying, mappingFunc)(input) + == Bind_(underlying, BindMapCallback(mappingFunc))(input) + { + reveal Map_(); + reveal Bind_(); + reveal Succeed_(); } - function {:opaque} Concat_(ghost size: nat, + opaque function Concat_( left: Parser, right: Parser ) : (p: Parser<(L, R)>) // Makes it possible to concatenate two consecutive parsers and return the pair of the results - ensures forall pos: nat | - ConcatSpec_(size, left, right, pos) - :: p.requires(pos) - && (p(pos).PSuccess? ==> pos <= p(pos).pos <= size) { - (pos: nat) requires ConcatSpec_(size, left, right, pos) + (input: seq) => - var (pos, l) :- left(pos); - var (pos, r) :- right(pos); - PSuccess(pos, (l, r)) + var (l, remaining) :- left(input); + var (r, remaining2) :- right(remaining); + PSuccess((l, r), remaining2) } - function {:opaque} ConcatR_( - ghost size: nat, + lemma AboutConcat_( + left: Parser, + right: Parser, + input: seq) + ensures var p := Concat_(left, right); + && (p(input).PSuccess? ==> + && left(input).PSuccess? + && p(input).result.0 == left(input).result + && var input2 := left(input).remaining; + && right(input2).PSuccess? + && p(input).result.1 == right(input2).result + && p(input).remaining == right(input2).remaining) + { + reveal Concat_(); + } + + function BindConcatCallback(right: Parser): (L, seq) -> Parser<(L, R)> + { + (l: L, remaining: seq) => + Map_(right, (r: R) => (l, r)) + } + + lemma AboutConcat_Bind_( + left: Parser, + right: Parser, + input: seq) + ensures Concat_(left, right)(input) == Bind_(left, BindConcatCallback(right))(input) + { + reveal Concat_(); + reveal Bind_(); + reveal Succeed_(); + reveal Map_(); + } + + + opaque function ConcatR_( left: Parser, right: Parser ) : (p: Parser) - // Same as Concat but only returns the second result - ensures forall pos: nat | - ConcatSpec_(size, left, right, pos) - :: p.requires(pos) - && (p(pos).PSuccess? ==> pos <= p(pos).pos <= size) + // Return only the result of the right parser if the two parsers match { - (pos: nat) - requires left.requires(pos) - requires left(pos).PSuccess? ==> right.requires(left(pos).pos) + (input: seq) => - var (pos, l) :- left(pos); - var (pos, r) :- right(pos); - PSuccess(pos, r) + var (l, remaining) :- left(input); + var (r, remaining2) :- right(remaining); + PSuccess(r, remaining2) } - function {:opaque} ConcatL_( - ghost size: nat, + lemma AboutConcatR_( + left: Parser, + right: Parser, + input: seq) + ensures var p := ConcatR_(left, right); + && (p(input).PSuccess? ==> + && left(input).PSuccess? + && var input2 := left(input).remaining; + && right(input2).PSuccess? + && p(input).result == right(input2).result + && p(input).remaining == right(input2).remaining) + { + reveal ConcatR_(); + } + + function first(): ((L, R)) -> L { + (lr: (L, R)) => lr.0 + } + function second(): ((L, R)) -> R { + (lr: (L, R)) => lr.1 + } + lemma AboutConcat_ConcatR_( + left: Parser, + right: Parser, + input: seq) + ensures Map_(Concat_(left, right), second())(input) == ConcatR_(left, right)(input) + { + reveal Concat_(); + reveal Succeed_(); + reveal ConcatR_(); + reveal Map_(); + } + + + opaque function ConcatL_( left: Parser, right: Parser ) : (p: Parser) - // Same as Concat but only returns the first result - ensures forall pos: nat | - ConcatSpec_(size, left, right, pos) - :: p.requires(pos) - && (p(pos).PSuccess? ==> pos <= p(pos).pos <= size) + // Return only the result of the right parser if the two parsers match { - (pos: nat) - requires left.requires(pos) - requires left(pos).PSuccess? ==> right.requires(left(pos).pos) + (input: seq) => - var (pos, l) :- left(pos); - var (pos, r) :- right(pos); - PSuccess(pos, l) + var (l, remaining) :- left(input); + var (r, remaining2) :- right(remaining); + PSuccess(l, remaining2) } - ghost predicate RepeatSpec(underlying: Parser, pos: nat, remaining: nat, size: nat) - // Encodes the proposition that, for all position between pos and size included, - // the underlying parser should be able to parse there, and if it succeeds, then - // its output position should be greater than the original parsing position, - // but bounded by size + lemma AboutConcatL_( + left: Parser, + right: Parser, + input: seq) + ensures var p := ConcatL_(left, right); + && (p(input).PSuccess? ==> + && left(input).PSuccess? + && var input2 := left(input).remaining; + && right(input2).PSuccess? + && p(input).result == left(input).result + && p(input).remaining == right(input2).remaining) { - && pos + remaining == size - && (forall pos' | pos <= pos' <= size :: - && underlying.requires(pos') - && (underlying(pos').PSuccess? ==> pos' <= underlying(pos').pos <= size)) + reveal ConcatL_(); } - - function {:opaque} Repeat_( - ghost remaining: nat, - ghost size: nat, - underlying: Parser - ): (p: Parser>) - // Given a parser on a string of size 'size' and with 'remaining' characters to parse - // (typically obtained by size - pos), returns a parser that can repeatedly parse the string - // This parser returns a sequence of all possible parsers - // If the underlying parser fails, then an empty sequence is returned - ensures forall pos: nat | RepeatSpec(underlying, pos, remaining, size) :: - && p.requires(pos) - && (p(pos).PSuccess? ==> pos <= p(pos).pos <= size) - && (p(pos).PFailure? ==> p(pos).level != Recoverable) + lemma AboutConcat_ConcatL_( + left: Parser, + right: Parser, + input: seq) + ensures Map_(Concat_(left, right), first())(input) == ConcatL_(left, right)(input) { - (pos: nat) requires RepeatSpec(underlying, pos, remaining, size) => - match underlying(pos) - case PSuccess(pos', head) => - if pos' <= pos then PSuccess(pos', [head]) else - match Repeat_(remaining-(pos'-pos), size, underlying)(pos') { - case PSuccess(pos'', tail) => - PSuccess(pos'', [head] + tail) - case PFailure(Error, message, pos'') => - PFailure(Error, message, pos') - } - case PFailure(Error, message, pos') => - PFailure(Error, message, pos') - case PFailure(Recoverable, message, pos') => - PSuccess(pos, []) + reveal Concat_(); + reveal Succeed_(); + reveal ConcatL_(); + reveal Map_(); + } + + /* + + opaque function Repeat( + maxPos: nat, + underlying: Parser + ): Parser> { + (pos: nat) => Repeat_(maxPos, underlying, [], pos, 0, pos) } - function {:opaque} {:tailrecursion true} Repeat0( - underlying: Parser, + opaque function {:tailrecursion true} Repeat_( + maxPos: nat, + underlying: Parser, + acc: seq, pos: nat, - acc: seq, - ghost size: nat - ): (p: ParseResult>) - // Alternative version of Repeat that does not return a parser, but directly the ParseResult - decreases size - pos - requires pos <= size - //requires forall pos: nat :: RepeatSpec(underlying, pos, remaining, size) - requires RepeatSpec(underlying, pos, size-pos, size) - ensures p.PFailure? ==> p.level != Recoverable - ensures p.PSuccess? ==> pos <= p.pos <= size + deltaPos: nat, + ghost initPos: nat // it's the invariant + ): (p: ParseResult>) + requires deltaPos == pos - initPos + decreases if pos <= maxPos then 1 + maxPos - pos else 0 + // Alternative tail-recursive version of Repeat that does not return a parser, but directly the ParseResult { + if pos > maxPos then PFailure(Error, "Cannot parse after the provide maximum position", pos, deltaPos) else match underlying(pos) - case PSuccess(pos', head) => - if pos' <= pos then PSuccess(pos', acc + [head]) else - Repeat0(underlying, pos', acc + [head], size) - case PFailure(Error, message, pos') => - PFailure(Error, message, pos') - case PFailure(Recoverable, message, pos') => - PSuccess(pos, acc) + case PSuccess(dpos1, head) => + if dpos1 == 0 then PSuccess(deltaPos, acc + [head]) else + Repeat_(maxPos, underlying, acc + [head], + pos + dpos1, deltaPos + dpos1, initPos) + case PFailure(Error, message, pos', deltaPos') => + PFailure(Error, message, pos', deltaPos') + case PFailure(Recoverable, message, pos', deltaPos') => + PSuccess(deltaPos, acc) } - lemma Repeat0DoesIncreasePosSometimes(underlying: Parser, pos: nat, size: nat) - requires pos <= size - requires RepeatSpec(underlying, pos, size-pos, size) - requires underlying.requires(pos) && underlying(pos).PSuccess? && pos < underlying(pos).pos - ensures - var result := Repeat0(underlying, pos, [], size); - result.PSuccess? && 0 < |result.t| ==> pos < result.pos + predicate ParserStaysWithin(underlying: Parser, maxPos: nat) { + forall pos: nat | 0 <= pos <= maxPos :: + && (underlying(pos).PFailure? ==> underlying(pos).level == Recoverable) + && (underlying(pos).PSuccess? ==> + pos + underlying(pos).deltaPos <= maxPos) + } + + lemma AboutRepeat_( + maxPos: nat, + underlying: Parser, + acc: seq, + pos: nat, + deltaPos: nat, + initPos: nat + ) + requires deltaPos == pos - initPos + // If underlying never throws a fatal error, + // returns a delta position that stays within the limit of maxPos, + // then Repeat with alwyas return a success, provided it's called with an adequate pos + decreases if pos <= maxPos then 1 + maxPos - pos else 0 + requires ParserStaysWithin(underlying, maxPos) + ensures var p := Repeat_(maxPos, underlying, acc, pos, deltaPos, initPos); + && (pos <= maxPos ==> p.PSuccess?) + && (p.PFailure? ==> p.level == Error && pos > maxPos) { - reveal Repeat0(); + reveal Repeat_(); } + predicate AboutRepeatIncreasesPosIfUnderlyingSucceedsAtLeastOnceEnsures( + maxPos: nat, + underlying: Parser, + acc: seq, + pos: nat, + deltaPos: nat, + initPos: nat + ) + requires deltaPos == pos - initPos + { + var result := Repeat_(maxPos, underlying, acc, pos, deltaPos, initPos); + && result.PSuccess? + && |acc| <= |result.result| + && pos <= initPos + result.deltaPos <= maxPos + && (underlying(pos).PSuccess? && 0 < underlying(pos).deltaPos + ==> + (|acc| < |result.result| && 0 < result.deltaPos)) + } - function {:opaque} RepeatAcc_( - underlying: Parser, + lemma AboutRepeatIncreasesPosIfUnderlyingSucceedsAtLeastOnce( + maxPos: nat, + underlying: Parser, + acc: seq, pos: nat, - init: I, - combine: (I, T) -> I, - ghost size: nat - ): (p: ParseResult) - // Alternative version of Repeat that does not return a parser, but directly the ParseResult - decreases size - pos - requires pos <= size - //requires forall pos: nat :: RepeatSpec(underlying, pos, remaining, size) - requires RepeatSpec(underlying, pos, size-pos, size) - ensures p.PFailure? ==> p.level != Recoverable - ensures p.PSuccess? ==> pos <= p.pos <= size + deltaPos: nat, + initPos: nat + ) + decreases if pos <= maxPos then 1 + maxPos - pos else 0 + requires deltaPos == pos - initPos + requires pos <= maxPos + requires ParserStaysWithin(underlying, maxPos) + ensures + AboutRepeatIncreasesPosIfUnderlyingSucceedsAtLeastOnceEnsures + (maxPos, underlying, acc, pos, deltaPos, initPos) { + var result := Repeat_(maxPos, underlying, acc, pos, deltaPos, initPos); + if pos > maxPos { + return; + } + reveal Repeat_(); match underlying(pos) - case PSuccess(pos', head) => - if pos' <= pos then PSuccess(pos', combine(init, head)) else - match RepeatAcc_(underlying, pos', combine(init, head), combine, size) { - case PSuccess(pos'', tail) => - PSuccess(pos'', tail) - case PFailure(Error, message, pos'') => - PFailure(Error, message, pos') + case PSuccess(dpos1, head) => + if dpos1 == 0 { + } else { + AboutRepeatIncreasesPosIfUnderlyingSucceedsAtLeastOnce + (maxPos, underlying, acc + [head], + pos + dpos1, deltaPos + dpos1, initPos); } - case PFailure(Error, message, pos') => - PFailure(Error, message, pos') - case PFailure(Recoverable, message, pos') => - PSuccess(pos, init) + case PFailure(Error, message, pos', deltaPos') => + case PFailure(Recoverable, message, pos', deltaPos') => } - ghost predicate FixSpecInner(size: nat, callback: Parser, u: nat) + ghost predicate FixSpecInner(maxPos: nat, callback: Parser, u: nat) // Specificaiton for Fixpoint. In other terms, verifies that the callback parser // accepts all positions between u (exclusive) and size (inclusive) { - && u <= size - && forall u': nat | u < u' <= size :: - callback.requires(u') - && (callback(u').PSuccess? ==> u' <= callback(u').pos <= size) + && u <= maxPos + && forall u': nat | u < u' <= maxPos :: + && (callback(u').PSuccess? ==> u' + callback(u').deltaPos <= maxPos) } - function {:opaque} Fixpoint_( - ghost size: nat, - underlying: (Parser, nat) --> ParseResult, + opaque function Fixpoint( + maxPos: nat, + underlying: (Parser, nat) -> ParseResult + ): (p: Parser) + // Given a function that requires a parser and a position to return a parse result, + // provide this function the Fixpoint() parser itself + // so that it makes it possible to iteratively parse the result + { + (pos: nat) => Fixpoint_(maxPos, underlying, pos) + } + + opaque function Fixpoint_( + maxPos: nat, + underlying: (Parser, nat) -> ParseResult, pos: nat - ): (p: ParseResult) + ): (p: ParseResult) // Given a function that combines a (recursive) parser and a position to obtain a parse result, // returns the parse result associated to recursively applying the function. // If partially applied on "underlying" and "size", it would returns the solution to the equation: // f = pos => underlying(f, pos) - decreases size - pos - requires pos <= size - requires - forall callback: Parser, u: nat | FixSpecInner(size, callback, u) :: - && underlying.requires(callback, u) - && (underlying(callback, u).PSuccess? ==> u <= underlying(callback, u).pos <= size) - ensures p.PSuccess? ==> pos <= p.pos <= size + decreases maxPos - pos { - var callback: Parser := (pos': nat) requires pos < pos' <= size => - Fixpoint_(size, underlying, pos'); + var callback: Parser := + (pos': nat) => + if pos < pos' <= maxPos then + Fixpoint_(maxPos, underlying, pos') + else if pos' <= pos then + PFailure(Recoverable, "No progress", pos', 0) + else + PFailure(Recoverable, "Parsing extended out of selected size", pos', 0); underlying(callback, pos) } - ghost predicate FixMapSpecInner(fun: string, functions: set, size: nat, callback: string --> nat --> ParseResult, u: nat) + predicate AboutFixpoint_Ensures( + maxPos: nat, + underlying: (Parser, nat) -> ParseResult, + pos: nat) + { + var p := Fixpoint_(maxPos, underlying, pos); + p.PSuccess? ==> pos + p.deltaPos <= maxPos + } + lemma AboutFixpoint_( + maxPos: nat, + underlying: (Parser, nat) -> ParseResult, + pos: nat) + requires + forall callback: Parser, u: nat + | underlying(callback, u).PSuccess? + :: u + underlying(callback, u).deltaPos <= maxPos + ensures AboutFixpoint_Ensures(maxPos, underlying, pos) + { + reveal Fixpoint_(); + } + opaque function FixpointMap( + maxPos: nat, + underlying: map Option>, nat) -> ParseResult>, + fun: string): Parser + { + (pos: nat) => FixpointMap_(maxPos, underlying, fun, pos) + } + + opaque function FixpointMap_( + maxPos: nat, + underlying: map Option>, nat) -> ParseResult>, + fun: string, + pos: nat + ): (p: ParseResult) + // Given a function that combines a (recursive) parser selector and a position to obtain a parse result, + // returns the parse result associated to recursively applying the function. + // If partially applied on "underlying" and "fun", it would return the solution f<"fun"> to the equations: + // f = pos => underlying[fun](f, pos) + decreases maxPos - pos, |fun| + { + if fun !in underlying then PFailure(Error, "Parser '"+fun+"' not found", pos, 0) else + var callback: string -> Option> + := + (fun': string) => + if fun' !in underlying.Keys then + None + else + Some( + (pos': nat) => + if pos < pos' <= maxPos || (pos' == pos && |fun'| < |fun|) then + FixpointMap_(maxPos, underlying, fun', pos') + else if pos' == pos then + PFailure(Recoverable, "Non progressing recursive call requires that '"+fun'+"' be shorter than '"+fun+"'", pos', 0) + else + PFailure(Error, "Parser did something unexpected, jump to position " + Printer.natToString(pos'), pos', 0) + ); + underlying[fun](callback, pos) + } + predicate AboutFixpointMap_Ensures( + maxPos: nat, + underlying: map Option>, nat) -> ParseResult>, + fun: string, + pos: nat) { + var p := FixpointMap_(maxPos, underlying, fun, pos); + && (p.PSuccess? ==> pos + p.deltaPos <= maxPos) + // Also, need to prove that if underlying never fails, then fixpoint never fails as well + } + + ghost predicate FixMapSpecInnerInner( + fun: string, fun': string, functions: set, maxPos: nat, callback: string -> Option>, u: nat) + { + forall u': nat | u < u' <= maxPos || (u == u' && |fun'| < |fun|) :: + && callback(fun').Some? + && var x := callback(fun').value(u'); + && (x.PSuccess? ==> u' + x.deltaPos <= maxPos) + } + + ghost predicate FixMapSpecInner(fun: string, functions: set, maxPos: nat, callback: string -> Option>, u: nat) // Specification for FixpointMap. // Ensures that, for any other function, if this function is in the set of admissible `functions`, // then callback should not only accept it, but then accept any position at a second argument if // 1) This position is strictly greater than the current position u // 2) Or this position is the same but the function name is smaller. { - && u <= size + && u <= maxPos && forall fun': string <- functions :: - && callback.requires(fun') - && forall u': nat | u < u' <= size || (u == u' && |fun'| < |fun|) :: - callback(fun').requires(u') - && var x := callback(fun')(u'); - && (x.PSuccess? ==> u' <= x.pos <= size) + FixMapSpecInnerInner(fun, fun', functions, maxPos, callback, u) } - ghost predicate FixpointMapSpecOnce(fun': string, impl: ParserMapper, otherFuns: set, size: nat) + ghost predicate FixpointMapSpecOnce(fun': string, impl: ParserMapper, otherFuns: set, maxPos: nat) { - forall callback: ParserSelector, u: nat | - && FixMapSpecInner(fun', otherFuns, size, callback, u) - :: impl.requires(callback, u) - && var x := impl(callback, u); - && (x.PSuccess? ==> u <= x.pos <= size) + forall callback: ParserSelector, u: nat | + && FixMapSpecInner(fun', otherFuns, maxPos, callback, u) + :: var x := impl(callback, u); + && (x.PSuccess? ==> u + x.deltaPos <= maxPos) } - function {:opaque} FixpointMap_( - ghost size: nat, - underlying: map Parser, nat) --> ParseResult>, + lemma AboutFixpointMap_( + maxPos: nat, + underlying: map Option>, nat) -> ParseResult>, fun: string, pos: nat - ): (p: ParseResult) - // Given a function that combines a (recursive) parser selector and a position to obtain a parse result, - // returns the parse result associated to recursively applying the function. - // If partially applied on "underlying" and "fun", it would return the solution f<"fun"> to the equations: - // f = pos => underlying[fun](f, pos) - decreases size - pos, |fun| - requires pos <= size - requires - && fun in underlying.Keys - && forall fun': string <- underlying.Keys :: - FixpointMapSpecOnce(fun', underlying[fun'], underlying.Keys, size) - ensures p.PSuccess? ==> pos <= p.pos <= size + ) + requires pos <= maxPos + requires + forall fun' <- underlying.Keys :: + FixpointMapSpecOnce(fun', underlying[fun'], underlying.Keys, maxPos) + ensures + AboutFixpointMap_Ensures(maxPos, underlying, fun, pos) { - var callback: string --> nat --> ParseResult - := (fun': string) requires fun' in underlying.Keys => - (pos': nat) requires pos < pos' <= size || (pos' == pos && |fun'| < |fun|) => - FixpointMap_(size, underlying, fun', pos'); - underlying[fun](callback, pos) + reveal FixpointMap_(); + var p := FixpointMap_(maxPos, underlying, fun, pos); + + var callback: string -> Option> + := + (fun': string) => + if fun' !in underlying.Keys then + None + else + Some( + (pos': nat) => + if pos < pos' <= maxPos || (pos' == pos && |fun'| < |fun|) then + FixpointMap_(maxPos, underlying, fun', pos') + else if pos' == pos then + PFailure(Recoverable, "Non progressing recursive call requires that '"+fun'+"' be shorter than '"+fun+"'", pos', 0) + else + PFailure(Error, "Parser did something unexpected, jump to position " + Printer.natToString(pos'), pos', 0) + ); + if fun in underlying { + assert {:only} FixMapSpecInner(fun, underlying.Keys, maxPos, callback, pos) by { + assume && pos <= maxPos + && forall fun': string <- underlying.Keys :: + FixMapSpecInnerInner(fun, fun', underlying.Keys, maxPos, callback, pos); + assume false; + } + assert p == underlying[fun](callback, pos); + assert FixpointMapSpecOnce(fun, underlying[fun], underlying.Keys, maxPos); + var impl := underlying[fun]; + assert forall callback: ParserSelector, u: nat | + && FixMapSpecInner(fun, underlying.Keys, maxPos, callback, u) + :: var x := impl(callback, u); + && (x.PSuccess? ==> u + x.deltaPos <= maxPos); + assert p.PSuccess? ==> pos + p.deltaPos <= maxPos; + } else { + } } - ghost predicate OrSpec( + ghost predicate OrSpec( size: nat, - left: Parser, - right: Parser, + left: Parser, + right: Parser, pos: nat) // Verifies that the two parsers can both be applied at the given position { @@ -405,16 +639,16 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} Parsers { && (left(pos).PSuccess? ==> pos <= left(pos).pos <= size) && right.requires(pos) && (right(pos).PSuccess? ==> pos <= right(pos).pos <= size) - // Since requires are only used for progression, I don't have a use case in which the following is useful + // Since requires are only used for progression, I don'result have a use case in which the following is useful /*&& var l := left(pos); && (l.PFailure? && l.level == Recoverable ==> right.requires(pos))*/ } - function {:opaque} Or_( + opaque function Or_( ghost size: nat, - left: Parser, - right: Parser - ) : (p: Parser) + left: Parser, + right: Parser + ) : (p: Parser) // Builds a parser from left and right such that, if left fails and is recoverable, then right is used instead. ensures forall pos: nat | OrSpec(size, left, right, pos) :: p.requires(pos) @@ -436,7 +670,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} Parsers { && (right(pos).PSuccess? ==> pos <= right(pos).pos <= size)) } - function {:opaque} EitherP_( + opaque function EitherP_( ghost size: nat, left: Parser, right: Parser @@ -461,56 +695,59 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserTheorems { import opened Parsers // The remaining are interesting proofs about some equivalence but are not useful - function {:opaque} Map__(ghost size: nat, underlying: Parser, mappingFunc: T --> U) + opaque function Map__(ghost size: nat, underlying: Parser, mappingFunc: R --> U) : (p: Parser) // Map is equivalent to a bind method: ensures forall pos: nat | MapSpec(size, underlying, mappingFunc, pos) :: p.requires(pos) { - var p := Bind_(size, underlying, (t: T, pos': nat) requires mappingFunc.requires(t) => Succeed_(size, mappingFunc(t))); + var p := Bind_(size, underlying, (result: R, pos': nat) requires mappingFunc.requires(result) => Succeed_(size, mappingFunc(result))); assert forall pos: nat | MapSpec(size, underlying, mappingFunc, pos) :: p.requires(pos) by { forall pos: nat | MapSpec(size, underlying, mappingFunc, pos) ensures p.requires(pos) { + AboutMap_(size, underlying, mappingFunc, pos); var left := underlying; - var right := (t: T, pos': nat) requires mappingFunc.requires(t) => Succeed_(size, mappingFunc(t)); + var right := (result: R, pos': nat) requires mappingFunc.requires(result) => Succeed_(size, mappingFunc(result)); assert BindSpec(size, left, right, pos); } } p } - lemma Map_Map2(size: nat, underlying: Parser, mappingFunc: T --> U, pos: nat) + lemma Map_Map2(size: nat, underlying: Parser, mappingFunc: R --> U, pos: nat) requires MapSpec(size, underlying, mappingFunc, pos) ensures - && Map__(size, underlying, mappingFunc)(pos) == Map_(size, underlying, mappingFunc)(pos) + && Map__(size, underlying, mappingFunc)(pos) == Map_(size, underlying, mappingFunc)(pos) { reveal Map_(); reveal Map__(); reveal Bind_(); + reveal Succeed_(); } - function {:opaque} Concat__(ghost size: nat, left: Parser, right: Parser) - : (p: Parser<(T, U)>) + opaque function Concat__(ghost size: nat, left: Parser, right: Parser) + : (p: Parser<(R, U)>) // Concat is equivalent to two binds methods ensures forall pos: nat | ConcatSpec_(size, left, right, pos) :: p.requires(pos) { - Bind_(size, left, (t: T, pos': nat) requires right.requires(pos') => - Bind_(size, right, (u: U, pos'': nat) => Succeed_(size, (t, u)))) + Bind_(size, left, (result: R, pos': nat) requires right.requires(pos') => + Bind_(size, right, (u: U, pos'': nat) => Succeed_(size, (result, u)))) } - lemma Concat_Concat2(size: nat, left: Parser, right: Parser, pos: nat) + lemma Concat_Concat2(size: nat, left: Parser, right: Parser, pos: nat) requires ConcatSpec_(size, left, right, pos) - ensures BindSpec(size, left, (t: T, pos': nat) requires right.requires(pos') => - Bind_(size, right, (u: U, pos'': nat) => Succeed_(size, (t, u))), pos) + ensures BindSpec(size, left, (result: R, pos': nat) requires right.requires(pos') => + Bind_(size, right, (u: U, pos'': nat) => Succeed_(size, (result, u))), pos) // TODO: Bug to report. Concat_() should not be needed ensures Concat_(size, left, right)(pos) == Concat__(size, left, right)(pos) { reveal Bind_(); reveal Concat_(); reveal Concat__(); + reveal Succeed_(); } } @@ -534,10 +771,10 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserBuilders { //import opened ParserEngine // Wrap the constructor in a class where the size is constant so that users - // don't need to provide it. - datatype ParserBuilder = B_(ghost size: nat, apply: Parser) + // don'result need to provide it. + datatype ParserBuilder = B_(ghost size: nat, apply: Parser) { - function {:opaque} o_I(other: ParserBuilder): (p: ParserBuilder) + opaque function o_I(other: ParserBuilder): (p: ParserBuilder) requires size == other.size ensures p.size == size ensures forall pos: nat | @@ -547,7 +784,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserBuilders { { B_(size, ConcatR_(size, apply, other.apply)) } - function {:opaque} I_o(other: ParserBuilder): (p: ParserBuilder) + opaque function I_o(other: ParserBuilder): (p: ParserBuilder) requires size == other.size ensures p.size == size ensures forall pos: nat | @@ -557,7 +794,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserBuilders { { B_(size, ConcatL_(size, apply, other.apply)) } - function {:opaque} M(mappingFunc: T --> U): (p: ParserBuilder) + opaque function M(mappingFunc: R --> U): (p: ParserBuilder) ensures p.size == size ensures forall pos: nat | MapSpec(size, apply, mappingFunc, pos) @@ -566,7 +803,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserBuilders { { B_(size, Map_(size, apply, mappingFunc)) } - function {:opaque} O(other: ParserBuilder): (p: ParserBuilder) + opaque function O(other: ParserBuilder): (p: ParserBuilder) requires size == other.size ensures size == p.size ensures forall pos: nat | @@ -577,20 +814,20 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserBuilders { B_(size, Or_(size, apply, other.apply)) } - function {:opaque} Then(other: (T, nat) --> ParserBuilder): (p: ParserBuilder) + opaque function Then(other: (R, nat) --> ParserBuilder): (p: ParserBuilder) ensures size == p.size ensures forall pos: nat | - BindSpec(size, apply, (t: T, pos': nat) requires other.requires(t, pos') => other(t, pos').apply, pos) + BindSpec(size, apply, (result: R, pos': nat) requires other.requires(result, pos') => other(result, pos').apply, pos) :: p.apply.requires(pos) && ( p.apply(pos).PSuccess? ==> && apply(pos).PSuccess? && apply(pos).pos <= p.apply(pos).pos <= size) { - B_(size, Bind_(size, apply, (t: T, pos': nat) requires other.requires(t, pos') => other(t, pos').apply)) + B_(size, Bind_(size, apply, (result: R, pos': nat) requires other.requires(result, pos') => other(result, pos').apply)) } - function {:opaque} Repeat(init: T, combine: (T, T) -> T): (p: ParserBuilder) + opaque function Repeat(init: R, combine: (R, R) -> R): (p: ParserBuilder) ensures size == p.size ensures forall pos: nat | pos <= size && RepeatSpec(apply, pos, size-pos, size) :: p.apply.requires(pos) @@ -604,7 +841,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserBuilders { => RepeatAcc_(apply, pos, init, combine, size)) } } - function {:opaque} FirstOf_(ghost size: nat, others: seq>): (p: ParserBuilder) + opaque function FirstOf_(ghost size: nat, others: seq>): (p: ParserBuilder) requires |others| > 0 requires forall other <- others :: other.size == size ensures p.size == size @@ -624,19 +861,19 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserBuilders { } p } - function R_(ghost size: nat, t: T): (p: ParserBuilder) + function R_(ghost size: nat, result: R): (p: ParserBuilder) { - B_(size, Succeed_(size, t)) + B_(size, Succeed_(size, result)) } - datatype FixMapParserBuilder = FixMapParserBuilder(ghost size: nat, ghost functions: set, underlying: map> := map[]) + datatype FixMapParserBuilder = FixMapParserBuilder(ghost size: nat, ghost functions: set, underlying: map> := map[]) { - static function Empty(ghost size: nat, ghost functions: set): (b: FixMapParserBuilder) ensures b.Valid() { + static function Empty(ghost size: nat, ghost functions: set): (b: FixMapParserBuilder) ensures b.Valid() { FixMapParserBuilder(size, functions, map[]) } ghost predicate Valid() { forall fun <- underlying :: FixpointMapSpecOnce(fun, underlying[fun], functions, size) } - function {:opaque} Add(name: string, mapper: ParserMapper): (f: FixMapParserBuilder) + opaque function Add(name: string, mapper: ParserMapper): (f: FixMapParserBuilder) requires Valid() requires name !in underlying requires FixpointMapSpecOnce(name, mapper, functions, size) @@ -656,7 +893,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserBuilders { module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { import opened ParserBuilders import opened Parsers - import opened Library + import opened Wrappers import opened Printer // Engine defines the following parsers: @@ -689,14 +926,14 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { trait {:termination false} Engine { const input: string - function {:opaque} Succeed(t: T): (p: Parser) + opaque function Succeed(result: R): (p: Parser) ensures forall pos: nat | pos <= |input| :: p.requires(pos) && p(pos).PSuccess? && pos == p(pos).pos <= |input| // A parser that does not consume any input and returns the given value { - (pos: nat) requires pos <= |input| => PSuccess(pos, t) + (pos: nat) requires pos <= |input| => PSuccess(pos, result) } - function {:opaque} Bind( + opaque function Bind( left: Parser, right: (L, nat) --> Parser ) : (p: Parser) @@ -706,19 +943,19 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { && (p(pos).PSuccess? ==> && left(pos).PSuccess? && left(pos).pos <= p(pos).pos <= |input| - && p(pos) == right(left(pos).t, left(pos).pos)(left(pos).pos)) + && p(pos) == right(left(pos).result, left(pos).pos)(left(pos).pos)) { Bind_(|input|, left, right) } - function {:opaque} Epsilon(pos: nat): (pr: ParseResult<()>) + opaque function Epsilon(pos: nat): (pr: ParseResult<()>) requires pos <= |input| ensures pr.PSuccess? && pr.pos == pos { Epsilon_(|input|)(pos) } - function {:opaque} Map(underlying: Parser, mappingFunc: T --> U) + opaque function Map(underlying: Parser, mappingFunc: R --> U) : (p: Parser) // A parser combinator that makes it possible to transform the result of a parser in another one // The mapping function can be partial @@ -729,12 +966,12 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { Map_(|input|, underlying, mappingFunc) } - ghost predicate MapFailureSpec(pos: nat, underlying: Parser) { + ghost predicate MapFailureSpec(pos: nat, underlying: Parser) { pos <= |input| && underlying.requires(pos) } - function {:opaque} MapFailure(underlying: Parser, mappingFunc: ParseResult --> ParseResult) - : (p: Parser) + opaque function MapFailure(underlying: Parser, mappingFunc: ParseResult --> ParseResult) + : (p: Parser) requires forall p: ParseResult | p.PFailure? :: mappingFunc.requires(p) && mappingFunc(p).PFailure? requires forall pos: nat | pos <= |input| :: && underlying.requires(pos) @@ -754,7 +991,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { ConcatSpec_(|input|, left, right, pos) } - function {:opaque} Concat( + opaque function Concat( left: Parser, right: Parser ) : (p: Parser<(L, R)>) @@ -767,7 +1004,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { Concat_(|input|, left, right) } - function {:opaque} ConcatR( + opaque function ConcatR( left: Parser, right: Parser ) : (p: Parser) @@ -780,7 +1017,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { ConcatR_(|input|, left, right) } - function {:opaque} ConcatL( + opaque function ConcatL( left: Parser, right: Parser ) : (p: Parser) @@ -793,10 +1030,10 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { ConcatL_(|input|, left, right) } - function {:opaque} Or( - left: Parser, - right: Parser - ) : (p: Parser) + opaque function Or( + left: Parser, + right: Parser + ) : (p: Parser) // Builds a parser from left and right such that, if left fails and is recoverable, then right is used instead. ensures forall pos: nat | OrSpec(|input|, left, right, pos) @@ -806,7 +1043,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { Or_(|input|, left, right) } - function {:opaque} EitherP( + opaque function EitherP( left: Parser, right: Parser ) : (p: Parser>) @@ -819,7 +1056,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { EitherP_(|input|, left, right) } - function {:opaque} Char(c: char): (p: Parser) + opaque function Char(c: char): (p: Parser) ensures forall pos: nat :: p.requires(pos) ensures forall pos: nat :: p(pos).PSuccess? ==> pos < |input| && p(pos).pos == pos + 1 @@ -829,7 +1066,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { else PFailure(Error, "Expected '"+[c]+"'", pos) } - function {:opaque} Char?(c: char): (p: Parser) + opaque function Char?(c: char): (p: Parser) ensures forall pos: nat :: p.requires(pos) ensures forall pos: nat :: p(pos).PSuccess? ==> pos < |input| && p(pos).pos == pos + 1 @@ -840,7 +1077,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { } // Returns a function that tests if, at the given position, we can find the string toTest - function {:opaque} TestString(toTest: string): (test: nat --> bool) + opaque function TestString(toTest: string): (test: nat --> bool) ensures forall pos: nat | pos <= |input| :: test.requires(pos) { (pos: nat) requires pos <= |input| => @@ -848,14 +1085,14 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { } // Returns a function that tests if, at the given position, we can find the string toTest - function {:opaque} TestNotString(toTest: string): (test: nat --> bool) + opaque function TestNotString(toTest: string): (test: nat --> bool) ensures forall pos: nat | pos <= |input| :: test.requires(pos) { (pos: nat) requires pos <= |input| => !(pos + |toTest| <= |input| && input[pos..pos+|toTest|] == toTest) } - function {:opaque} CharTest?(test: nat --> bool): (p: Parser) + opaque function CharTest?(test: nat --> bool): (p: Parser) requires forall pos: nat | pos < |input| :: test.requires(pos) ensures forall pos: nat | pos <= |input| :: p.requires(pos) && @@ -873,25 +1110,25 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { pos + |expected| <= |input| && p(pos).pos == pos + |expected|) } - function {:opaque} C(expected: string): (p: ParserBuilder) + opaque function C(expected: string): (p: ParserBuilder) ensures p.size == |input| ensures ConstSpec(expected, p.apply) { B_(|input|, Const(expected)) } - function {:opaque} C?(expected: string): (p: ParserBuilder) + opaque function C?(expected: string): (p: ParserBuilder) ensures p.size == |input| ensures ConstSpec(expected, p.apply) { B_(|input|, Const?(expected)) } - function B(underlying: Parser): (p: ParserBuilder) + function B(underlying: Parser): (p: ParserBuilder) ensures p.size == |input| { B_(|input|, underlying) } - function {:opaque} Const(expected: string): (p: Parser) + opaque function Const(expected: string): (p: Parser) ensures ConstSpec(expected, p) { (pos: nat) => @@ -899,7 +1136,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { else PFailure(Error, "Expected '"+expected+"'", pos) } - function {:opaque} Const?(expected: string): (p: Parser) + opaque function Const?(expected: string): (p: Parser) ensures ConstSpec(expected, p) { (pos: nat) => @@ -907,20 +1144,20 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { else PFailure(Recoverable, "Possibly expecting something else but that's ok", pos) } - function {:opaque} Maybe(underlying: Parser): (p: Parser>) + opaque function Maybe(underlying: Parser): (p: Parser>) requires IsRegular(underlying) ensures IsRegular(p) { - Or(Map(underlying, (t: T) => Some(t)), Succeed(None)) + Or(Map(underlying, (result: R) => Some(result)), Succeed(None)) } - function {:opaque} Newline(): (p: Parser) + opaque function Newline(): (p: Parser) ensures IsRegular(p) { Or(Const?("\r\n"), Or(Const?("\r"), Const("\n"))) } - function {:opaque} Test?(test: (string, nat) --> bool): (p: Parser<()>) + opaque function Test?(test: (string, nat) --> bool): (p: Parser<()>) requires forall pos: nat | pos <= |input| :: test.requires(input, pos) ensures forall pos: nat | pos <= |input| :: p.requires(pos) && @@ -933,14 +1170,14 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { // Given a test on a position, returns a parser that succeeds with the longest string // starting at a given position which succeeds the test on every character // If the test succeeds immediately, returns a recoverable failure instead - function {:opaque} While?(test: nat --> bool): (p: Parser) + opaque function While?(test: nat --> bool): (p: Parser) requires forall pos: nat | pos <= |input| :: test.requires(pos) ensures forall pos: nat | pos <= |input| :: p.requires(pos) && (p(pos).PSuccess? ==> pos < p(pos).pos <= |input|) { var p := Bind(Rep(CharTest?(test)), (result: string, pos': nat) => - if result == "" then Fail("Did not find an non-empty string satisfying test", Recoverable) + if result == "" then Fail_("Did not find an non-empty string satisfying test", Recoverable) else Succeed(result)); assert forall pos: nat | pos <= |input| :: p.requires(pos) && (p(pos).PSuccess? ==> pos < p(pos).pos <= |input|) by { @@ -955,7 +1192,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { p } - function {:opaque} EverythingUntilAndDrop(str: string): (p: Parser) + opaque function EverythingUntilAndDrop(str: string): (p: Parser) ensures forall pos: nat | pos <= |input| :: p.requires(pos) && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) @@ -964,14 +1201,14 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { } - ghost predicate RepSpec(underlying: Parser, pos: nat) { + ghost predicate RepSpec(underlying: Parser, pos: nat) { && pos <= |input| && (forall pos' | pos <= pos' <= |input| :: && underlying.requires(pos') && (underlying(pos').PSuccess? ==> pos' <= underlying(pos').pos <= |input|)) } - function {:opaque} Rep(underlying: Parser): (p: Parser>) + opaque function Rep(underlying: Parser): (p: Parser>) ensures forall pos: nat | RepSpec(underlying, pos) :: p.requires(pos) && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) @@ -982,22 +1219,22 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { Repeat0(underlying, pos, [], |input|) } - lemma RepDoesIncreasePosSometimes(underlying: Parser, pos: nat) + lemma RepDoesIncreasePosSometimes(underlying: Parser, pos: nat) requires pos <= |input| && RepeatSpec(underlying, pos, |input|-pos, |input|) requires underlying.requires(pos) && underlying(pos).PSuccess? ==> pos < underlying(pos).pos ensures var p := Rep(underlying); - (p(pos).PSuccess? && |p(pos).t| > 0 ==> pos < p(pos).pos) + (p(pos).PSuccess? && |p(pos).result| > 0 ==> pos < p(pos).pos) { reveal Rep(); reveal Repeat0(); } - function {:opaque} RepAcc( - underlying: Parser, + opaque function RepAcc( + underlying: Parser, init: I, - combine: (I, T) -> I + combine: (I, R) -> I ): (p: Parser) ensures forall pos: nat | RepSpec(underlying, pos) :: && p.requires(pos) @@ -1007,16 +1244,16 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { RepeatAcc_(underlying, pos, init, combine, |input|) } - ghost predicate FixSpec(size: nat, underlying: (Parser, nat) --> ParseResult, pos: nat) { + ghost predicate FixSpec(size: nat, underlying: (Parser, nat) --> ParseResult, pos: nat) { && pos <= size - && forall callback: Parser, u: nat | + && forall callback: Parser, u: nat | FixSpecInner(size, callback, u) :: underlying.requires(callback, u) && (underlying(callback, u).PSuccess? ==> u <= underlying(callback, u).pos <= size) } - function GetFixBase(): map> { map[] } + function GetFixBase(): map> { map[] } - function {:opaque} Fix(underlying: (Parser, nat) --> ParseResult): (p: Parser) + opaque function Fix(underlying: (Parser, nat) --> ParseResult): (p: Parser) ensures forall pos: nat| FixSpec(|input|, underlying, pos) :: p.requires(pos) && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) @@ -1024,7 +1261,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { (pos: nat) requires FixSpec(|input|, underlying, pos) => - Fixpoint_( + Fixpoint_( |input|, underlying, pos @@ -1032,20 +1269,20 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { } // TODO: Dafny clinic. Cannot make it opaque, otherwise // even with reveal cannot verify other functions - ghost predicate FixMapInnerOnce(fun: string, impl: ParserMapper, otherFuns: set, size: nat) + ghost predicate FixMapInnerOnce(fun: string, impl: ParserMapper, otherFuns: set, size: nat) { FixpointMapSpecOnce(fun, impl, otherFuns, |input|) } - ghost predicate FixMapInner(size: nat, underlying: map>) { + ghost predicate FixMapInner(size: nat, underlying: map>) { forall fun: string <- underlying.Keys :: FixMapInnerOnce(fun, underlying[fun], underlying.Keys, size) } - function {:opaque} FixMap( - underlying: map>, + opaque function FixMap( + underlying: map>, fun: string - ): (p: Parser) + ): (p: Parser) requires {:error "Second argument of FixMap might not be a key of the first"} fun in underlying.Keys requires {:error "First argument might not satisfy FixMapInner(|input|, arg). Said otherwise, key/value pairs of the first argument might not all satisfy FixMapInnerOnce(key, value, keys, |input|)"} FixMapInner(|input|, underlying) ensures forall pos: nat | pos <= |input| :: @@ -1054,27 +1291,27 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { { (pos: nat) requires pos <= |input| => //reveal FixMapInnerOnce(); - FixpointMap_( + FixpointMap_( |input|, underlying, fun, pos ) } - function {:opaque} FixMapBuilder(ghost functions: set): (r: FixMapParserBuilder) + opaque function FixMapBuilder(ghost functions: set): (r: FixMapParserBuilder) ensures r.Valid() ensures |r.underlying.Keys| == 0 ensures r.functions == functions ensures r.size == |input| { - var underlying: map> := map[]; + var underlying: map> := map[]; FixMapParserBuilder(|input|, functions, underlying) } const EOS: Parser<()> := (pos: nat) => if pos >= |input| then PSuccess(pos, ()) else PFailure(Error, "Expected end of string", pos) - function {:opaque} FirstOf(others: seq>): (p: ParserBuilder) + opaque function FirstOf(others: seq>): (p: ParserBuilder) requires |others| > 0 requires forall other <- others :: other.size == |input| ensures @@ -1087,22 +1324,22 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { FirstOf_(|input|, others) } - function R(t: T): (p: ParserBuilder) + function R(result: R): (p: ParserBuilder) { - B(Succeed(t)) + B(Succeed(result)) } const natToDigit: seq := "0123456789" const digitToNat: map := map i | 0 <= i < |natToDigit| :: natToDigit[i] := i - function {:opaque} Digit(test: bool := true): (p: Parser) + opaque function Digit(test: bool := true): (p: Parser) ensures forall pos:nat | pos <= |input| :: && p.requires(pos) && (p(pos).PSuccess? ==> && pos < |input| && input[pos] in digitToNat - && digitToNat[input[pos]] == p(pos).t - && 0 <= p(pos).t <= 9 + && digitToNat[input[pos]] == p(pos).result + && 0 <= p(pos).result <= 9 && p(pos).pos == pos + 1) { (pos: nat) requires pos <= |input| => @@ -1112,12 +1349,12 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { else PFailure(if test then Recoverable else Error, "Expected a digit", pos) } - ghost predicate RecSpec(fun: string, otherFuns: set, rec: string --> Parser, pos: nat) + ghost predicate RecSpec(fun: string, otherFuns: set, rec: string --> Parser, pos: nat) { FixMapSpecInner(fun, otherFuns, |input|, rec, pos) } - ghost predicate RecSpecOnce(fun: string, otherFuns: set, mapper: ParserMapper) { + ghost predicate RecSpecOnce(fun: string, otherFuns: set, mapper: ParserMapper) { FixMapInnerOnce(fun, mapper, otherFuns, |input|) } @@ -1125,7 +1362,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { // but I'm missing the ability to call another parser builder with the result of the first one // to avoid callbacks. - function {:opaque} {:vcs_split_on_every_assert} Nat(test: bool := true): (p: Parser) + opaque function {:vcs_split_on_every_assert} Nat(test: bool := true): (p: Parser) ensures forall pos: nat | pos <= |input| :: && p.requires(pos) && (p(pos).PSuccess? ==> pos < p(pos).pos <= |input|) @@ -1137,7 +1374,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { ) } - function {:opaque} {:vcs_split_on_every_assert} N(test: bool := true): (p: ParserBuilder) + opaque function {:vcs_split_on_every_assert} N(test: bool := true): (p: ParserBuilder) ensures p.size == |input| && forall pos: nat | pos <= |input| :: && p.apply.requires(pos) @@ -1146,19 +1383,19 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { B(Nat(test)) } - function {:opaque} Spaces?(): (r: Parser) + opaque function Spaces?(): (r: Parser) ensures forall pos: nat | pos <= |input| :: r.requires(pos) && (r(pos).PSuccess? ==> pos <= r(pos).pos <= |input|) { (pos: nat) requires pos <= |input| => - if pos < |input| && input[pos] in " \n\r\t" then + if pos < |input| && input[pos] in " \n\r\result" then PSuccess(pos+1, input[pos..pos+1]) else PFailure(Recoverable, "Spaces", pos) } - function {:opaque} SkipSpaces(p: Parser): (r: Parser) + opaque function SkipSpaces(p: Parser): (r: Parser) requires forall pos: nat | pos <= |input| :: p.requires(pos) && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) @@ -1187,11 +1424,11 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { LineContainingPos(pos, p + 1, lineNumber, if p <= pos then charNumber + 1 else charNumber, startLinePos) } - ghost predicate IsRegular(p: Parser) { + ghost predicate IsRegular(p: Parser) { IsRegular_(p, |input|) } - /*function {:opaque} Regex(s: string): (r: Parser) + /*opaque function Regex(s: string): (r: Parser) ensures forall pos: nat | pos <= |input| :: r.requires(pos) && (r(pos).PSuccess? ==> pos <= r(pos).pos <= |input|) @@ -1202,14 +1439,14 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { }*/ - function FeedbackToString(result: ParseResult): string + function FeedbackToString(result: ParseResult): string requires result.PFailure? { var (line, lineNumber, charNumber) := LineContainingPos(result.pos); result.message + " at position "+Printer.natToString(result.pos)+" line "+Printer.natToString(lineNumber)+", column "+Printer.natToString(charNumber)+":\n>"+ line+"\n"+seq(charNumber, i => ' ')+"^\n" } - method ReportError(p: ParseResult) + method ReportError(p: ParseResult) requires p.PFailure? { var (line, lineNumber, charNumber) := LineContainingPos(p.pos); @@ -1218,22 +1455,22 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { p.message, "\n"; return; } - function Debug(message: string, x: T): T { + function Debug(message: string, x: R): R { x } by method { print message, ":", x, "\n"; return x; } - function DebugParserFail(msg: string): Parser { + function DebugParserFail(msg: string): Parser { (p: nat) => ParseResult.PFailure(Recoverable, "", Debug( if p < |input| then msg + "'"+( if input[p] == '\r' then "\\r" else if input[p] == '\n' then "\\n" else input[p..p+1]) - + "' " + FeedbackToString(PFailure(Recoverable, "", p)) + "\n" else + + "' " + FeedbackToString(PFailure(Recoverable, "", p)) + "\n" else msg, p)) } - function {:opaque} DebugParser(msg: string, other: Parser): (p: Parser) + opaque function DebugParser(msg: string, other: Parser): (p: Parser) ensures IsRegular(other) ==> IsRegular(p) { var debugParser := DebugParserFail(msg+" (before)"); @@ -1247,4 +1484,5 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { } } type DigitNat = d: nat | 0 <= d <= 9 + */ } \ No newline at end of file From 9947ce77402c6646125c69ae8853c03d39821fd9 Mon Sep 17 00:00:00 2001 From: Mikael Mayer Date: Fri, 10 Nov 2023 12:02:21 -0600 Subject: [PATCH 03/22] Proof about parsers --- src/Parsers/parser.dfy | 75 ++++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 40 deletions(-) diff --git a/src/Parsers/parser.dfy b/src/Parsers/parser.dfy index 396bedb5..7c9680cd 100644 --- a/src/Parsers/parser.dfy +++ b/src/Parsers/parser.dfy @@ -7,7 +7,7 @@ abstract module Parsers { import Printer type Option = Wrappers.Option - type C(!new) // The character of the sequence being parsed + type C(!new, ==) // The character of the sequence being parsed datatype Either<+L, +R> = // Type to return when using the Or parser @@ -326,67 +326,62 @@ abstract module Parsers { reveal Map_(); } - /* - opaque function Repeat( - maxPos: nat, underlying: Parser ): Parser> { - (pos: nat) => Repeat_(maxPos, underlying, [], pos, 0, pos) + (input: seq) => Repeat_(underlying, [], input) } opaque function {:tailrecursion true} Repeat_( - maxPos: nat, underlying: Parser, acc: seq, - pos: nat, - deltaPos: nat, - ghost initPos: nat // it's the invariant + input: seq ): (p: ParseResult>) - requires deltaPos == pos - initPos - decreases if pos <= maxPos then 1 + maxPos - pos else 0 - // Alternative tail-recursive version of Repeat that does not return a parser, but directly the ParseResult + decreases |input| + // Repeat the underlying parser over the input until a recoverable failure happens + // and returns the accumulated results { - if pos > maxPos then PFailure(Error, "Cannot parse after the provide maximum position", pos, deltaPos) else - match underlying(pos) - case PSuccess(dpos1, head) => - if dpos1 == 0 then PSuccess(deltaPos, acc + [head]) else - Repeat_(maxPos, underlying, acc + [head], - pos + dpos1, deltaPos + dpos1, initPos) - case PFailure(Error, message, pos', deltaPos') => - PFailure(Error, message, pos', deltaPos') - case PFailure(Recoverable, message, pos', deltaPos') => - PSuccess(deltaPos, acc) + match underlying(input) + case PSuccess(result, remaining) => + if |remaining| >= |input| then PSuccess(acc + [result], input) else + Repeat_(underlying, acc + [result], remaining) + case PFailure(Error, message, remaining) => + PFailure(Error, message, remaining) + case PFailure(Recoverable, message, remaining) => + PSuccess(acc, input) } - predicate ParserStaysWithin(underlying: Parser, maxPos: nat) { - forall pos: nat | 0 <= pos <= maxPos :: - && (underlying(pos).PFailure? ==> underlying(pos).level == Recoverable) - && (underlying(pos).PSuccess? ==> - pos + underlying(pos).deltaPos <= maxPos) + predicate IsRemaining(input: seq, remaining: seq) + { + && |remaining| <= |input| + && input[|input|-|remaining|..] == remaining + } + + predicate ParserStaysWithin(underlying: Parser, input: seq) { + forall i | 0 <= i <= |input| :: + && var remaining := input[i..]; + && (assert IsRemaining(input, remaining); + underlying(remaining).PFailure? ==> underlying(remaining).level == Recoverable) + && (underlying(remaining).PSuccess? ==> + IsRemaining(remaining, underlying(remaining).remaining)) } lemma AboutRepeat_( - maxPos: nat, underlying: Parser, acc: seq, - pos: nat, - deltaPos: nat, - initPos: nat + input: seq ) - requires deltaPos == pos - initPos // If underlying never throws a fatal error, - // returns a delta position that stays within the limit of maxPos, - // then Repeat with alwyas return a success, provided it's called with an adequate pos - decreases if pos <= maxPos then 1 + maxPos - pos else 0 - requires ParserStaysWithin(underlying, maxPos) - ensures var p := Repeat_(maxPos, underlying, acc, pos, deltaPos, initPos); - && (pos <= maxPos ==> p.PSuccess?) - && (p.PFailure? ==> p.level == Error && pos > maxPos) + // returns a remaining that is a suffix of the input, + // then Repeat with always return a success + decreases |input| + requires ParserStaysWithin(underlying, input) + ensures Repeat_(underlying, acc, input).PSuccess? { reveal Repeat_(); + var _ := input[0..]; } - +/* predicate AboutRepeatIncreasesPosIfUnderlyingSucceedsAtLeastOnceEnsures( maxPos: nat, underlying: Parser, From daed7e1532314458fcd847d21a6a0dc1d3fa15c1 Mon Sep 17 00:00:00 2001 From: Mikael Mayer Date: Fri, 10 Nov 2023 16:05:20 -0600 Subject: [PATCH 04/22] Refactoring to make better usage of modules --- src/Parsers/parser.dfy | 723 ++++++++++++++++++++++------------------- 1 file changed, 389 insertions(+), 334 deletions(-) diff --git a/src/Parsers/parser.dfy b/src/Parsers/parser.dfy index 7c9680cd..e35ea001 100644 --- a/src/Parsers/parser.dfy +++ b/src/Parsers/parser.dfy @@ -61,11 +61,25 @@ abstract module Parsers { // A parser selector is a function that, given a name that exists, // returns a parser associated to this name - //type ParserMapper = (ParserSelector, nat) -> ParseResult - // A parser mapper is the building block of recursive parser. Given a parser selector and a position, - // it returns a parsing result. So it's like a parser, but it can also invoke other parsers. + predicate IsRemaining(input: seq, remaining: seq) + // Remaining is a suffix of the input + { + && |remaining| <= |input| + && input[|input|-|remaining|..] == remaining + } + + opaque ghost predicate NonCrashing(underlying: Parser) + // A parser is valid for an input if it never returns a fatal error + // and always returns a suffix of its input + { + forall input: seq :: + && (underlying(input).PFailure? ==> underlying(input).level == Recoverable) + && IsRemaining(input, underlying(input).remaining) + } - // Parser combinators. The following functions make it possible to create and compose parsers + // Parser combinators. + // The following functions make it possible to create and compose parsers + // All these combinators provide non-crashing parsers if their inputs are noncrashing opaque function Succeed_(result: R): (p: Parser) // A parser that does not consume any input and returns the given value @@ -74,15 +88,22 @@ abstract module Parsers { (input: seq) => PSuccess(result, input) } - lemma AboutSucceed_(result: R, input: seq) - ensures - var p := Succeed_(result); - && p(input).PSuccess? - && p(input).remaining == input - { reveal Succeed_(); } + lemma Succeed_NonCrashing(result: R) + ensures NonCrashing(Succeed_(result)) + { reveal NonCrashing(), Succeed_(); } + + lemma Succeed_NonCrashingAuto() + ensures forall result: R :: NonCrashing(Succeed_(result)) + { reveal NonCrashing(), Succeed_(); } opaque function Epsilon_(): (p: Parser<()>) - { Succeed_(()) } + { + Succeed_(()) + } + + lemma Epsilon_NonCrashing() + ensures NonCrashing(Epsilon_()) + { reveal NonCrashing(), Epsilon_(); Succeed_NonCrashing(()); } lemma AboutEpsilon_(input: seq) ensures @@ -100,32 +121,30 @@ abstract module Parsers { (input: seq) => PFailure(level, message, input) } - lemma AboutFail_(message: string, level: FailureLevel, input: seq) - ensures - var p := Fail_(message, level)(input); - && p.PFailure? - && p.message == message - && p.level == level - && p.remaining == input - { - reveal Fail_(); - } + lemma Fail_NonCrashing(message: string) + ensures NonCrashing(Fail_(message, Recoverable)) + { reveal Fail_(); reveal NonCrashing(); } - lemma AboutFail_2(message: string, input: seq) - ensures - var p := Fail_(message)(input); - && p.PFailure? - && p.message == message - && p.level == Recoverable - && p.remaining == input + lemma Fail_NonCrashingAuto() + ensures forall message :: NonCrashing(Fail_(message, Recoverable)) + { reveal Fail_(); reveal NonCrashing(); } + + opaque function Bind_( + left: Parser, + right: L -> Parser + ) : (p: Parser) { - reveal Fail_(); + (input: seq) + => + var (leftResult, remaining) :- left(input); + right(leftResult)(remaining) } - opaque function Bind_( + opaque function Bind__( left: Parser, right: (L, seq) -> Parser ) : (p: Parser) + // Useful for recursive binds { (input: seq) => @@ -133,23 +152,46 @@ abstract module Parsers { right(leftResult, remaining)(remaining) } - lemma AboutBind_( + ghost predicate BindRightNonCrashing(right: (L, seq) -> Parser) { + forall l: L, input: seq :: NonCrashing(right(l, input)) + } + + lemma Bind_NonCrashing( left: Parser, - right: (L, seq) -> Parser, - input: seq - ) - ensures - var p := Bind_(left, right)(input); - && var leftResult := left(input); - && !leftResult.IsFailure() - ==> var leftValues := left(input).Extract(); - && var rightResult := right(leftValues.0, leftValues.1)(leftValues.1); - && !rightResult.IsFailure() - ==> && !p.IsFailure() - && p.remaining == rightResult.remaining - && p.result == rightResult.result + right: (L, seq) -> Parser + ) + requires NonCrashing(left) + requires BindRightNonCrashing(right) + ensures NonCrashing(Bind__(left, right)) { - reveal Bind_(); + reveal Bind__(), NonCrashing(); + var p := Bind__(left, right); + forall input: seq ensures + && (p(input).PFailure? ==> p(input).level == Recoverable) + && IsRemaining(input, p(input).remaining) + { + + } + } + + ghost predicate Bind_NonCrashingRight(left: Parser) + requires NonCrashing(left) + { + forall right: (L, seq) -> Parser | BindRightNonCrashing(right) :: + NonCrashing(Bind__(left, right)) + } + + lemma Bind_NonCrashingAuto() + ensures forall left: Parser | NonCrashing(left) :: + Bind_NonCrashingRight(left) + { + forall left: Parser | NonCrashing(left), + right: (L, seq) -> Parser | BindRightNonCrashing(right) + ensures + NonCrashing(Bind__(left, right)) + { + Bind_NonCrashing(left, right); + } } opaque function Map_(underlying: Parser, mappingFunc: R -> U) @@ -165,33 +207,6 @@ abstract module Parsers { PSuccess(u, remaining) } - lemma AboutMap_(underlying: Parser, mappingFunc: R -> U, input: seq) - ensures var p := Map_(underlying, mappingFunc); - && (underlying(input).PSuccess? <==> p(input).PSuccess?) - && (p(input).PSuccess? ==> - && p(input).remaining == underlying(input).remaining - && p(input).result == mappingFunc(underlying(input).result)) - { - reveal Map_(); - reveal Bind_(); - reveal Succeed_(); - } - - function BindMapCallback(mappingFunc: R -> U): - (R, seq) -> Parser - { - (result: R, remaining: seq) => Succeed_(mappingFunc(result)) - } - - lemma AboutMap_Bind_(underlying: Parser, mappingFunc: R -> U, input: seq) - ensures Map_(underlying, mappingFunc)(input) - == Bind_(underlying, BindMapCallback(mappingFunc))(input) - { - reveal Map_(); - reveal Bind_(); - reveal Succeed_(); - } - opaque function Concat_( left: Parser, right: Parser @@ -205,41 +220,6 @@ abstract module Parsers { PSuccess((l, r), remaining2) } - lemma AboutConcat_( - left: Parser, - right: Parser, - input: seq) - ensures var p := Concat_(left, right); - && (p(input).PSuccess? ==> - && left(input).PSuccess? - && p(input).result.0 == left(input).result - && var input2 := left(input).remaining; - && right(input2).PSuccess? - && p(input).result.1 == right(input2).result - && p(input).remaining == right(input2).remaining) - { - reveal Concat_(); - } - - function BindConcatCallback(right: Parser): (L, seq) -> Parser<(L, R)> - { - (l: L, remaining: seq) => - Map_(right, (r: R) => (l, r)) - } - - lemma AboutConcat_Bind_( - left: Parser, - right: Parser, - input: seq) - ensures Concat_(left, right)(input) == Bind_(left, BindConcatCallback(right))(input) - { - reveal Concat_(); - reveal Bind_(); - reveal Succeed_(); - reveal Map_(); - } - - opaque function ConcatR_( left: Parser, right: Parser @@ -253,40 +233,6 @@ abstract module Parsers { PSuccess(r, remaining2) } - lemma AboutConcatR_( - left: Parser, - right: Parser, - input: seq) - ensures var p := ConcatR_(left, right); - && (p(input).PSuccess? ==> - && left(input).PSuccess? - && var input2 := left(input).remaining; - && right(input2).PSuccess? - && p(input).result == right(input2).result - && p(input).remaining == right(input2).remaining) - { - reveal ConcatR_(); - } - - function first(): ((L, R)) -> L { - (lr: (L, R)) => lr.0 - } - function second(): ((L, R)) -> R { - (lr: (L, R)) => lr.1 - } - lemma AboutConcat_ConcatR_( - left: Parser, - right: Parser, - input: seq) - ensures Map_(Concat_(left, right), second())(input) == ConcatR_(left, right)(input) - { - reveal Concat_(); - reveal Succeed_(); - reveal ConcatR_(); - reveal Map_(); - } - - opaque function ConcatL_( left: Parser, right: Parser @@ -300,32 +246,6 @@ abstract module Parsers { PSuccess(l, remaining2) } - lemma AboutConcatL_( - left: Parser, - right: Parser, - input: seq) - ensures var p := ConcatL_(left, right); - && (p(input).PSuccess? ==> - && left(input).PSuccess? - && var input2 := left(input).remaining; - && right(input2).PSuccess? - && p(input).result == left(input).result - && p(input).remaining == right(input2).remaining) - { - reveal ConcatL_(); - } - lemma AboutConcat_ConcatL_( - left: Parser, - right: Parser, - input: seq) - ensures Map_(Concat_(left, right), first())(input) == ConcatL_(left, right)(input) - { - reveal Concat_(); - reveal Succeed_(); - reveal ConcatL_(); - reveal Map_(); - } - opaque function Repeat( underlying: Parser ): Parser> { @@ -351,199 +271,81 @@ abstract module Parsers { PSuccess(acc, input) } - predicate IsRemaining(input: seq, remaining: seq) - { - && |remaining| <= |input| - && input[|input|-|remaining|..] == remaining - } - - predicate ParserStaysWithin(underlying: Parser, input: seq) { - forall i | 0 <= i <= |input| :: - && var remaining := input[i..]; - && (assert IsRemaining(input, remaining); - underlying(remaining).PFailure? ==> underlying(remaining).level == Recoverable) - && (underlying(remaining).PSuccess? ==> - IsRemaining(remaining, underlying(remaining).remaining)) - } - - lemma AboutRepeat_( - underlying: Parser, - acc: seq, - input: seq - ) - // If underlying never throws a fatal error, - // returns a remaining that is a suffix of the input, - // then Repeat with always return a success - decreases |input| - requires ParserStaysWithin(underlying, input) - ensures Repeat_(underlying, acc, input).PSuccess? - { - reveal Repeat_(); - var _ := input[0..]; - } -/* - predicate AboutRepeatIncreasesPosIfUnderlyingSucceedsAtLeastOnceEnsures( - maxPos: nat, - underlying: Parser, - acc: seq, - pos: nat, - deltaPos: nat, - initPos: nat - ) - requires deltaPos == pos - initPos - { - var result := Repeat_(maxPos, underlying, acc, pos, deltaPos, initPos); - && result.PSuccess? - && |acc| <= |result.result| - && pos <= initPos + result.deltaPos <= maxPos - && (underlying(pos).PSuccess? && 0 < underlying(pos).deltaPos - ==> - (|acc| < |result.result| && 0 < result.deltaPos)) - } - - lemma AboutRepeatIncreasesPosIfUnderlyingSucceedsAtLeastOnce( - maxPos: nat, - underlying: Parser, - acc: seq, - pos: nat, - deltaPos: nat, - initPos: nat - ) - decreases if pos <= maxPos then 1 + maxPos - pos else 0 - requires deltaPos == pos - initPos - requires pos <= maxPos - requires ParserStaysWithin(underlying, maxPos) - ensures - AboutRepeatIncreasesPosIfUnderlyingSucceedsAtLeastOnceEnsures - (maxPos, underlying, acc, pos, deltaPos, initPos) - { - var result := Repeat_(maxPos, underlying, acc, pos, deltaPos, initPos); - if pos > maxPos { - return; - } - reveal Repeat_(); - match underlying(pos) - case PSuccess(dpos1, head) => - if dpos1 == 0 { - } else { - AboutRepeatIncreasesPosIfUnderlyingSucceedsAtLeastOnce - (maxPos, underlying, acc + [head], - pos + dpos1, deltaPos + dpos1, initPos); - } - case PFailure(Error, message, pos', deltaPos') => - case PFailure(Recoverable, message, pos', deltaPos') => - } - - ghost predicate FixSpecInner(maxPos: nat, callback: Parser, u: nat) - // Specificaiton for Fixpoint. In other terms, verifies that the callback parser - // accepts all positions between u (exclusive) and size (inclusive) - { - && u <= maxPos - && forall u': nat | u < u' <= maxPos :: - && (callback(u').PSuccess? ==> u' + callback(u').deltaPos <= maxPos) - } - opaque function Fixpoint( - maxPos: nat, - underlying: (Parser, nat) -> ParseResult + underlying: Parser -> Parser ): (p: Parser) // Given a function that requires a parser and a position to return a parse result, // provide this function the Fixpoint() parser itself // so that it makes it possible to iteratively parse the result { - (pos: nat) => Fixpoint_(maxPos, underlying, pos) + (input: seq) => Fixpoint_(underlying, input) } opaque function Fixpoint_( - maxPos: nat, - underlying: (Parser, nat) -> ParseResult, - pos: nat + underlying: Parser -> Parser, + input: seq ): (p: ParseResult) // Given a function that combines a (recursive) parser and a position to obtain a parse result, // returns the parse result associated to recursively applying the function. // If partially applied on "underlying" and "size", it would returns the solution to the equation: // f = pos => underlying(f, pos) - decreases maxPos - pos + decreases |input| { var callback: Parser := - (pos': nat) => - if pos < pos' <= maxPos then - Fixpoint_(maxPos, underlying, pos') - else if pos' <= pos then - PFailure(Recoverable, "No progress", pos', 0) + (remaining: seq) => + if |remaining| < |input| then + Fixpoint_(underlying, remaining) + else if |remaining| == |input| then + PFailure(Recoverable, "No progress", remaining) else - PFailure(Recoverable, "Parsing extended out of selected size", pos', 0); - underlying(callback, pos) - } - - predicate AboutFixpoint_Ensures( - maxPos: nat, - underlying: (Parser, nat) -> ParseResult, - pos: nat) - { - var p := Fixpoint_(maxPos, underlying, pos); - p.PSuccess? ==> pos + p.deltaPos <= maxPos - } - lemma AboutFixpoint_( - maxPos: nat, - underlying: (Parser, nat) -> ParseResult, - pos: nat) - requires - forall callback: Parser, u: nat - | underlying(callback, u).PSuccess? - :: u + underlying(callback, u).deltaPos <= maxPos - ensures AboutFixpoint_Ensures(maxPos, underlying, pos) - { - reveal Fixpoint_(); + PFailure(Error, "Fixpoint called with an increasing remaining sequence", remaining); + underlying(callback)(input) } - opaque function FixpointMap( + /*opaque function FixpointMap( maxPos: nat, underlying: map Option>, nat) -> ParseResult>, fun: string): Parser { (pos: nat) => FixpointMap_(maxPos, underlying, fun, pos) - } - + }*/ + datatype RecursiveDef = RecursiveDef( + order: nat, + definition: (string -> Option>) -> Parser + ) opaque function FixpointMap_( - maxPos: nat, - underlying: map Option>, nat) -> ParseResult>, + underlying: map>, fun: string, - pos: nat + input: seq ): (p: ParseResult) // Given a function that combines a (recursive) parser selector and a position to obtain a parse result, // returns the parse result associated to recursively applying the function. // If partially applied on "underlying" and "fun", it would return the solution f<"fun"> to the equations: // f = pos => underlying[fun](f, pos) - decreases maxPos - pos, |fun| + decreases |input|, if fun in underlying then underlying[fun].order else 0 { - if fun !in underlying then PFailure(Error, "Parser '"+fun+"' not found", pos, 0) else + if fun !in underlying then PFailure(Error, "Parser '"+fun+"' not found", input) else + var RecursiveDef(orderFun, definitionFun) := underlying[fun]; var callback: string -> Option> := (fun': string) => if fun' !in underlying.Keys then - None + Option.None else - Some( - (pos': nat) => - if pos < pos' <= maxPos || (pos' == pos && |fun'| < |fun|) then - FixpointMap_(maxPos, underlying, fun', pos') - else if pos' == pos then - PFailure(Recoverable, "Non progressing recursive call requires that '"+fun'+"' be shorter than '"+fun+"'", pos', 0) + var RecursiveDef(orderFun', definitionFun') := underlying[fun']; + Option.Some( + (remaining: seq) => + if |remaining| < |input| || (|remaining| == |input| && orderFun' < orderFun) then + FixpointMap_(underlying, fun', remaining) + else if |remaining| == |input| then + PFailure(Recoverable, "Non progressing recursive call requires that order of '" + +fun'+"' ("+Printer.natToString(orderFun')+") is lower than the order of '"+fun+"' ("+Printer.natToString(orderFun)+")", remaining) else - PFailure(Error, "Parser did something unexpected, jump to position " + Printer.natToString(pos'), pos', 0) + PFailure(Error, "Parser did not return a suffix of the input", remaining) ); - underlying[fun](callback, pos) - } - predicate AboutFixpointMap_Ensures( - maxPos: nat, - underlying: map Option>, nat) -> ParseResult>, - fun: string, - pos: nat) { - var p := FixpointMap_(maxPos, underlying, fun, pos); - && (p.PSuccess? ==> pos + p.deltaPos <= maxPos) - // Also, need to prove that if underlying never fails, then fixpoint never fails as well + definitionFun(callback)(input) } - + + /* ghost predicate FixMapSpecInnerInner( fun: string, fun': string, functions: set, maxPos: nat, callback: string -> Option>, u: nat) { @@ -696,7 +498,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserTheorems { ensures forall pos: nat | MapSpec(size, underlying, mappingFunc, pos) :: p.requires(pos) { - var p := Bind_(size, underlying, (result: R, pos': nat) requires mappingFunc.requires(result) => Succeed_(size, mappingFunc(result))); + var p := Bind__(size, underlying, (result: R, pos': nat) requires mappingFunc.requires(result) => Succeed_(size, mappingFunc(result))); assert forall pos: nat | MapSpec(size, underlying, mappingFunc, pos) :: p.requires(pos) by { forall pos: nat | MapSpec(size, underlying, mappingFunc, pos) @@ -718,7 +520,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserTheorems { { reveal Map_(); reveal Map__(); - reveal Bind_(); + reveal Bind__(); reveal Succeed_(); } @@ -728,18 +530,18 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserTheorems { ensures forall pos: nat | ConcatSpec_(size, left, right, pos) :: p.requires(pos) { - Bind_(size, left, (result: R, pos': nat) requires right.requires(pos') => - Bind_(size, right, (u: U, pos'': nat) => Succeed_(size, (result, u)))) + Bind__(size, left, (result: R, pos': nat) requires right.requires(pos') => + Bind__(size, right, (u: U, pos'': nat) => Succeed_(size, (result, u)))) } lemma Concat_Concat2(size: nat, left: Parser, right: Parser, pos: nat) requires ConcatSpec_(size, left, right, pos) ensures BindSpec(size, left, (result: R, pos': nat) requires right.requires(pos') => - Bind_(size, right, (u: U, pos'': nat) => Succeed_(size, (result, u))), pos) + Bind__(size, right, (u: U, pos'': nat) => Succeed_(size, (result, u))), pos) // TODO: Bug to report. Concat_() should not be needed ensures Concat_(size, left, right)(pos) == Concat__(size, left, right)(pos) { - reveal Bind_(); + reveal Bind__(); reveal Concat_(); reveal Concat__(); reveal Succeed_(); @@ -819,7 +621,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserBuilders { && apply(pos).PSuccess? && apply(pos).pos <= p.apply(pos).pos <= size) { - B_(size, Bind_(size, apply, (result: R, pos': nat) requires other.requires(result, pos') => other(result, pos').apply)) + B_(size, Bind__(size, apply, (result: R, pos': nat) requires other.requires(result, pos') => other(result, pos').apply)) } opaque function Repeat(init: R, combine: (R, R) -> R): (p: ParserBuilder) @@ -862,17 +664,17 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserBuilders { } datatype FixMapParserBuilder = FixMapParserBuilder(ghost size: nat, ghost functions: set, underlying: map> := map[]) { - static function Empty(ghost size: nat, ghost functions: set): (b: FixMapParserBuilder) ensures b.Valid() { + static function Empty(ghost size: nat, ghost functions: set): (b: FixMapParserBuilder) ensures b.NonCrashing() { FixMapParserBuilder(size, functions, map[]) } - ghost predicate Valid() { + ghost predicate NonCrashing() { forall fun <- underlying :: FixpointMapSpecOnce(fun, underlying[fun], functions, size) } opaque function Add(name: string, mapper: ParserMapper): (f: FixMapParserBuilder) - requires Valid() + requires NonCrashing() requires name !in underlying requires FixpointMapSpecOnce(name, mapper, functions, size) - ensures f.Valid() + ensures f.NonCrashing() ensures f.functions == functions ensures f.size == size ensures name in f.underlying @@ -940,7 +742,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { && left(pos).pos <= p(pos).pos <= |input| && p(pos) == right(left(pos).result, left(pos).pos)(left(pos).pos)) { - Bind_(|input|, left, right) + Bind__(|input|, left, right) } opaque function Epsilon(pos: nat): (pr: ParseResult<()>) @@ -1294,7 +1096,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { ) } opaque function FixMapBuilder(ghost functions: set): (r: FixMapParserBuilder) - ensures r.Valid() + ensures r.NonCrashing() ensures |r.underlying.Keys| == 0 ensures r.functions == functions ensures r.size == |input| @@ -1480,4 +1282,257 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { } type DigitNat = d: nat | 0 <= d <= 9 */ +} + +abstract module ParserTests refines Parsers { + lemma AboutSucceed_(result: R, input: seq) + ensures + var p := Succeed_(result); + && p(input).PSuccess? + && p(input).remaining == input + { reveal Succeed_(); } + + lemma AboutFail_(message: string, level: FailureLevel, input: seq) + ensures + var p := Fail_(message, level)(input); + && p.PFailure? + && p.message == message + && p.level == level + && p.remaining == input + { + reveal Fail_(); + } + + lemma AboutFail_2(message: string, input: seq) + ensures + var p := Fail_(message)(input); + && p.PFailure? + && p.message == message + && p.level == Recoverable + && p.remaining == input + { + reveal Fail_(); + } + + lemma AboutBind_( + left: Parser, + right: (L, seq) -> Parser, + input: seq + ) + ensures + var p := Bind__(left, right)(input); + && var leftResult := left(input); + && !leftResult.IsFailure() + ==> var leftValues := left(input).Extract(); + && var rightResult := right(leftValues.0, leftValues.1)(leftValues.1); + && !rightResult.IsFailure() + ==> && !p.IsFailure() + && p.remaining == rightResult.remaining + && p.result == rightResult.result + { + reveal Bind__(); + } + + lemma AboutMap_(underlying: Parser, mappingFunc: R -> U, input: seq) + ensures var p := Map_(underlying, mappingFunc); + && (underlying(input).PSuccess? <==> p(input).PSuccess?) + && (p(input).PSuccess? ==> + && p(input).remaining == underlying(input).remaining + && p(input).result == mappingFunc(underlying(input).result)) + { + reveal Map_(); + reveal Bind__(); + reveal Succeed_(); + } + + function BindMapCallback(mappingFunc: R -> U): + (R, seq) -> Parser + { + (result: R, remaining: seq) => Succeed_(mappingFunc(result)) + } + + lemma AboutMap_Bind_(underlying: Parser, mappingFunc: R -> U, input: seq) + ensures Map_(underlying, mappingFunc)(input) + == Bind__(underlying, BindMapCallback(mappingFunc))(input) + { + reveal Map_(); + reveal Bind__(); + reveal Succeed_(); + } + + lemma AboutConcat_( + left: Parser, + right: Parser, + input: seq) + ensures var p := Concat_(left, right); + && (p(input).PSuccess? ==> + && left(input).PSuccess? + && p(input).result.0 == left(input).result + && var input2 := left(input).remaining; + && right(input2).PSuccess? + && p(input).result.1 == right(input2).result + && p(input).remaining == right(input2).remaining) + { + reveal Concat_(); + } + + function BindConcatCallback(right: Parser): (L, seq) -> Parser<(L, R)> + { + (l: L, remaining: seq) => + Map_(right, (r: R) => (l, r)) + } + + lemma AboutConcat_Bind_( + left: Parser, + right: Parser, + input: seq) + ensures Concat_(left, right)(input) == Bind__(left, BindConcatCallback(right))(input) + { + reveal Concat_(); + reveal Bind__(); + reveal Succeed_(); + reveal Map_(); + } + + lemma AboutConcatR_( + left: Parser, + right: Parser, + input: seq) + ensures var p := ConcatR_(left, right); + && (p(input).PSuccess? ==> + && left(input).PSuccess? + && var input2 := left(input).remaining; + && right(input2).PSuccess? + && p(input).result == right(input2).result + && p(input).remaining == right(input2).remaining) + { + reveal ConcatR_(); + } + + function first(): ((L, R)) -> L { + (lr: (L, R)) => lr.0 + } + function second(): ((L, R)) -> R { + (lr: (L, R)) => lr.1 + } + lemma AboutConcat_ConcatR_( + left: Parser, + right: Parser, + input: seq) + ensures Map_(Concat_(left, right), second())(input) == ConcatR_(left, right)(input) + { + reveal Concat_(); + reveal Succeed_(); + reveal ConcatR_(); + reveal Map_(); + } + + + lemma AboutConcatL_( + left: Parser, + right: Parser, + input: seq) + ensures var p := ConcatL_(left, right); + && (p(input).PSuccess? ==> + && left(input).PSuccess? + && var input2 := left(input).remaining; + && right(input2).PSuccess? + && p(input).result == left(input).result + && p(input).remaining == right(input2).remaining) + { + reveal ConcatL_(); + } + lemma AboutConcat_ConcatL_( + left: Parser, + right: Parser, + input: seq) + ensures Map_(Concat_(left, right), first())(input) == ConcatL_(left, right)(input) + { + reveal Concat_(); + reveal Succeed_(); + reveal ConcatL_(); + reveal Map_(); + } + + lemma AboutRepeat_( + underlying: Parser, + acc: seq, + input: seq + ) + // If underlying never throws a fatal error, + // returns a remaining that is a suffix of the input, + // then Repeat with always return a success + decreases |input| + requires NonCrashing(underlying) + ensures Repeat_(underlying, acc, input).PSuccess? + { + reveal Repeat_(), NonCrashing(); + assert IsRemaining(input, input[0..]); + } + + predicate AboutRepeatIncreasesPosIfUnderlyingSucceedsAtLeastOnceEnsures( + underlying: Parser, + acc: seq, + input: seq + ) + { + var result := Repeat_(underlying, acc, input); + && result.PSuccess? + && |acc| <= |result.result| + && (underlying(input).PSuccess? && |underlying(input).remaining| < |input| + ==> + (|acc| < |result.result| && |result.remaining| < |input|)) + } + + lemma AboutRepeatIncreasesPosIfUnderlyingSucceedsAtLeastOnce( + underlying: Parser, + acc: seq, + input: seq + ) + decreases |input| + requires NonCrashing(underlying) + ensures + AboutRepeatIncreasesPosIfUnderlyingSucceedsAtLeastOnceEnsures + (underlying, acc, input) + { + reveal Repeat_(), NonCrashing(); + var _ := input[0..]; + match underlying(input) + case PSuccess(result, remaining) => + if |remaining| < |input| { + AboutRepeatIncreasesPosIfUnderlyingSucceedsAtLeastOnce(underlying, acc + [result], remaining); + } + case _ => + } + + predicate AboutFixpoint_Ensures( + underlying: Parser -> Parser, + input: seq) + { + var p := Fixpoint_(underlying, input); + p.PSuccess? ==> IsRemaining(input, p.remaining) + } + + lemma AboutFixpoint_( + underlying: Parser -> Parser, + input: seq) + requires + forall callback: Parser, u: seq + | underlying(callback)(u).PSuccess? + :: IsRemaining(input, underlying(callback)(input).remaining) + ensures AboutFixpoint_Ensures(underlying, input) + { + reveal Fixpoint_(); + } + + + predicate AboutFixpointMap_Ensures( + underlying: map>, + fun: string, + input: seq + ) { + var p := FixpointMap_(underlying, fun, input); + && (p.PSuccess? ==> IsRemaining(input, p.remaining)) + } + } \ No newline at end of file From 2ca16b4dca529429585904b3eb44b4cace074918 Mon Sep 17 00:00:00 2001 From: Mikael Mayer Date: Mon, 13 Nov 2023 07:57:58 -0600 Subject: [PATCH 05/22] Support for conjunctive parsers --- src/Parsers/parser.dfy | 282 +++++++++++++++++++++++++++++------------ 1 file changed, 202 insertions(+), 80 deletions(-) diff --git a/src/Parsers/parser.dfy b/src/Parsers/parser.dfy index e35ea001..2d25087e 100644 --- a/src/Parsers/parser.dfy +++ b/src/Parsers/parser.dfy @@ -15,11 +15,11 @@ abstract module Parsers { datatype FailureLevel = - // PFailure level for parse results. An Error will be propagated to the top + // PFailure level for parse results. A Fatal error will be propagated to the top // while a Recoverable can be caught by a disjunctive pattern. // For example, if the parser Const?() fails, then it returns a Recoverable, // but the parser Const() will return an error. - Error | Recoverable + Fatal | Recoverable datatype ParseResult<+R> = // ParseResult is a failure-compatible type @@ -32,6 +32,16 @@ abstract module Parsers { PFailure? } + predicate IsFatalFailure() { + PFailure? && level == Fatal + } + + predicate IsFatal() + requires IsFailure() + { + level == Fatal + } + function PropagateFailure(): ParseResult requires IsFailure() { @@ -51,7 +61,15 @@ abstract module Parsers { case PFailure(level, message, remaining) => PFailure(level, message, remaining) } + + function IfRecoverableFailureNoProgress(input: seq, right: Parser): ParseResult { + if PFailure? && level == Recoverable && !Committed(input, remaining) then + right(input) + else + this + } } + type Parser<+R> = seq -> ParseResult // A parser is a total function from a position to a parse result // Because it returns a delta pos, it cannot return a position negative from the origing @@ -61,6 +79,10 @@ abstract module Parsers { // A parser selector is a function that, given a name that exists, // returns a parser associated to this name + predicate Committed(input: seq, remaining: seq) { + input != remaining + } + predicate IsRemaining(input: seq, remaining: seq) // Remaining is a suffix of the input { @@ -96,43 +118,47 @@ abstract module Parsers { ensures forall result: R :: NonCrashing(Succeed_(result)) { reveal NonCrashing(), Succeed_(); } - opaque function Epsilon_(): (p: Parser<()>) + opaque function Epsilon(): (p: Parser<()>) { Succeed_(()) } lemma Epsilon_NonCrashing() - ensures NonCrashing(Epsilon_()) - { reveal NonCrashing(), Epsilon_(); Succeed_NonCrashing(()); } + ensures NonCrashing(Epsilon()) + { reveal NonCrashing(), Epsilon(); Succeed_NonCrashing(()); } lemma AboutEpsilon_(input: seq) ensures - var p := Epsilon_(); + var p := Epsilon(); && p(input).PSuccess? && p(input).remaining == input { - reveal Epsilon_(); + reveal Epsilon(); reveal Succeed_(); } - opaque function Fail_(message: string, level: FailureLevel := Recoverable): Parser + opaque function Fail(message: string, level: FailureLevel := Recoverable): Parser // A parser that does not consume any input and returns the given failure { (input: seq) => PFailure(level, message, input) } lemma Fail_NonCrashing(message: string) - ensures NonCrashing(Fail_(message, Recoverable)) - { reveal Fail_(); reveal NonCrashing(); } + ensures NonCrashing(Fail(message, Recoverable)) + { reveal Fail(); reveal NonCrashing(); } lemma Fail_NonCrashingAuto() - ensures forall message :: NonCrashing(Fail_(message, Recoverable)) - { reveal Fail_(); reveal NonCrashing(); } + ensures forall message :: NonCrashing(Fail(message, Recoverable)) + { reveal Fail(); reveal NonCrashing(); } - opaque function Bind_( + opaque function Bind( left: Parser, right: L -> Parser ) : (p: Parser) + // Fails if the left parser fails. + // If the left parser succeeds, provides its result and its remaining + // to the right parser generator and returns its result applied to the remaining + // For a more general version, look at BindSucceeds and Bind___ { (input: seq) => @@ -140,11 +166,14 @@ abstract module Parsers { right(leftResult)(remaining) } - opaque function Bind__( + opaque function BindSucceeds( left: Parser, right: (L, seq) -> Parser ) : (p: Parser) - // Useful for recursive binds + // Fails if the left parser fails. + // If the left parser succeeds, provides its result and its remaining + // to the right parser generator and returns its result applied to the remaining + // For a more general version, look at BindResult { (input: seq) => @@ -152,6 +181,18 @@ abstract module Parsers { right(leftResult, remaining)(remaining) } + opaque function BindResult( + left: Parser, + right: (ParseResult, seq) -> Parser + ) : (p: Parser) + // Given a left parser and a parser generator based on the output + // of the left parser, returns the result of the second parser + { + (input: seq) + => + right(left(input), input)(input) + } + ghost predicate BindRightNonCrashing(right: (L, seq) -> Parser) { forall l: L, input: seq :: NonCrashing(right(l, input)) } @@ -162,10 +203,10 @@ abstract module Parsers { ) requires NonCrashing(left) requires BindRightNonCrashing(right) - ensures NonCrashing(Bind__(left, right)) + ensures NonCrashing(BindSucceeds(left, right)) { - reveal Bind__(), NonCrashing(); - var p := Bind__(left, right); + reveal BindSucceeds(), NonCrashing(); + var p := BindSucceeds(left, right); forall input: seq ensures && (p(input).PFailure? ==> p(input).level == Recoverable) && IsRemaining(input, p(input).remaining) @@ -178,7 +219,7 @@ abstract module Parsers { requires NonCrashing(left) { forall right: (L, seq) -> Parser | BindRightNonCrashing(right) :: - NonCrashing(Bind__(left, right)) + NonCrashing(BindSucceeds(left, right)) } lemma Bind_NonCrashingAuto() @@ -188,13 +229,13 @@ abstract module Parsers { forall left: Parser | NonCrashing(left), right: (L, seq) -> Parser | BindRightNonCrashing(right) ensures - NonCrashing(Bind__(left, right)) + NonCrashing(BindSucceeds(left, right)) { Bind_NonCrashing(left, right); } } - opaque function Map_(underlying: Parser, mappingFunc: R -> U) + opaque function Map(underlying: Parser, mappingFunc: R -> U) : (p: Parser) // A parser combinator that makes it possible to transform the result of a parser in another one // The mapping function can be partial @@ -207,6 +248,70 @@ abstract module Parsers { PSuccess(u, remaining) } + opaque function Not(underlying: Parser): Parser<()> + // Returns a parser that succeeds if the underlying + { + (input: seq) => + var l := underlying(input); + if l.IsFailure() then + if l.IsFatal() then l.PropagateFailure() + else PSuccess((), input) + else PFailure(Recoverable, "Not failed", input) + } + + opaque function And( + left: Parser, + right: Parser + ) : (p: Parser<(L, R)>) + // Make the two parsers parse the same string, and return + // a pair of the two results, with the remaining of the right + { + (input: seq) => + var (l, remainingLeft) :- left(input); + var (r, remainingRight) :- right(input); + PSuccess((l, r), remainingRight) + } + + opaque function Or( + left: Parser, + right: Parser + ) : (p: Parser>) + // left parses the string. If left succeeds, returns + // if left fails, two cases + // - If the error is recoverable and the parser did not consume input, + // then return what right returns + // - Otherwise return the first error + { + (input: seq) => + var p := Map(left, l => Left(l))(input); + p.IfRecoverableFailureNoProgress(input, + Map(right, r => Right(r))) + } + + opaque function Lookahead(underlying: Parser): (p: Parser) + // If the underlying parser succeeds, returns its result without committing the input + // if the underlying parser fails, + // - If the failure is fatal, returns it as-it + // - If the failure is recoverable, returns it without comitting the input + { + (input: seq) => + var p := underlying(input); + if p.IsFatalFailure() then p + else p.(remaining := input) + } + + opaque function If( + condition: Parser, + succeed: Parser + ) : (p: Parser) + // If the condifition fails, returns a non-committing failure + // Suitable to use in Or parsers + { + (input: seq) => + var (p, remaining) :- Lookahead(condition)(input); + succeed(input) + } + opaque function Concat_( left: Parser, right: Parser @@ -220,6 +325,23 @@ abstract module Parsers { PSuccess((l, r), remaining2) } + opaque function ConcatLeftNonCommitting( + left: Parser, + right: Parser + ) : (p: Parser<(L, R)>) + // Makes it possible to concatenate two consecutive parsers and return the pair of the results + { + (input: seq) + => + var lResult := left(input); + if lResult.PFailure? && lResult.level == Recoverable then + PFailure(Recoverable, lResult.message, input) + else + var (l, remaining) :- left(input); + var (r, remaining2) :- right(remaining); + PSuccess((l, r), remaining2) + } + opaque function ConcatR_( left: Parser, right: Parser @@ -265,8 +387,8 @@ abstract module Parsers { case PSuccess(result, remaining) => if |remaining| >= |input| then PSuccess(acc + [result], input) else Repeat_(underlying, acc + [result], remaining) - case PFailure(Error, message, remaining) => - PFailure(Error, message, remaining) + case PFailure(Fatal, message, remaining) => + PFailure(Fatal, message, remaining) case PFailure(Recoverable, message, remaining) => PSuccess(acc, input) } @@ -298,19 +420,19 @@ abstract module Parsers { else if |remaining| == |input| then PFailure(Recoverable, "No progress", remaining) else - PFailure(Error, "Fixpoint called with an increasing remaining sequence", remaining); + PFailure(Fatal, "Fixpoint called with an increasing remaining sequence", remaining); underlying(callback)(input) } /*opaque function FixpointMap( maxPos: nat, - underlying: map Option>, nat) -> ParseResult>, + underlying: map, nat) -> ParseResult>, fun: string): Parser { (pos: nat) => FixpointMap_(maxPos, underlying, fun, pos) }*/ datatype RecursiveDef = RecursiveDef( order: nat, - definition: (string -> Option>) -> Parser + definition: (ParserSelector) -> Parser ) opaque function FixpointMap_( underlying: map>, @@ -323,9 +445,9 @@ abstract module Parsers { // f = pos => underlying[fun](f, pos) decreases |input|, if fun in underlying then underlying[fun].order else 0 { - if fun !in underlying then PFailure(Error, "Parser '"+fun+"' not found", input) else + if fun !in underlying then PFailure(Fatal, "Parser '"+fun+"' not found", input) else var RecursiveDef(orderFun, definitionFun) := underlying[fun]; - var callback: string -> Option> + var callback: ParserSelector := (fun': string) => if fun' !in underlying.Keys then @@ -340,14 +462,14 @@ abstract module Parsers { PFailure(Recoverable, "Non progressing recursive call requires that order of '" +fun'+"' ("+Printer.natToString(orderFun')+") is lower than the order of '"+fun+"' ("+Printer.natToString(orderFun)+")", remaining) else - PFailure(Error, "Parser did not return a suffix of the input", remaining) + PFailure(Fatal, "Parser did not return a suffix of the input", remaining) ); definitionFun(callback)(input) } /* ghost predicate FixMapSpecInnerInner( - fun: string, fun': string, functions: set, maxPos: nat, callback: string -> Option>, u: nat) + fun: string, fun': string, functions: set, maxPos: nat, callback: ParserSelector, u: nat) { forall u': nat | u < u' <= maxPos || (u == u' && |fun'| < |fun|) :: && callback(fun').Some? @@ -355,7 +477,7 @@ abstract module Parsers { && (x.PSuccess? ==> u' + x.deltaPos <= maxPos) } - ghost predicate FixMapSpecInner(fun: string, functions: set, maxPos: nat, callback: string -> Option>, u: nat) + ghost predicate FixMapSpecInner(fun: string, functions: set, maxPos: nat, callback: ParserSelector, u: nat) // Specification for FixpointMap. // Ensures that, for any other function, if this function is in the set of admissible `functions`, // then callback should not only accept it, but then accept any position at a second argument if @@ -377,7 +499,7 @@ abstract module Parsers { lemma AboutFixpointMap_( maxPos: nat, - underlying: map Option>, nat) -> ParseResult>, + underlying: map, nat) -> ParseResult>, fun: string, pos: nat ) @@ -391,7 +513,7 @@ abstract module Parsers { reveal FixpointMap_(); var p := FixpointMap_(maxPos, underlying, fun, pos); - var callback: string -> Option> + var callback: ParserSelector := (fun': string) => if fun' !in underlying.Keys then @@ -404,7 +526,7 @@ abstract module Parsers { else if pos' == pos then PFailure(Recoverable, "Non progressing recursive call requires that '"+fun'+"' be shorter than '"+fun+"'", pos', 0) else - PFailure(Error, "Parser did something unexpected, jump to position " + Printer.natToString(pos'), pos', 0) + PFailure(Fatal, "Parser did something unexpected, jump to position " + Printer.natToString(pos'), pos', 0) ); if fun in underlying { assert {:only} FixMapSpecInner(fun, underlying.Keys, maxPos, callback, pos) by { @@ -455,7 +577,7 @@ abstract module Parsers { => match left(pos) case PFailure(Recoverable, message, pos') => right(pos) - case PFailure(Error, message, pos') => PFailure(Error, message, pos') + case PFailure(Fatal, message, pos') => PFailure(Fatal, message, pos') case PSuccess(pos, l) => PSuccess(pos, l) } @@ -483,7 +605,7 @@ abstract module Parsers { => match left(pos) case PFailure(Recoverable, message, pos') => right(pos).MapResult(r => Right(r)) - case PFailure(Error, message, pos') => PFailure(Error, message, pos') + case PFailure(Fatal, message, pos') => PFailure(Fatal, message, pos') case PSuccess(pos, l) => PSuccess(pos, Left(l)) } } @@ -498,7 +620,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserTheorems { ensures forall pos: nat | MapSpec(size, underlying, mappingFunc, pos) :: p.requires(pos) { - var p := Bind__(size, underlying, (result: R, pos': nat) requires mappingFunc.requires(result) => Succeed_(size, mappingFunc(result))); + var p := BindSucceeds(size, underlying, (result: R, pos': nat) requires mappingFunc.requires(result) => Succeed_(size, mappingFunc(result))); assert forall pos: nat | MapSpec(size, underlying, mappingFunc, pos) :: p.requires(pos) by { forall pos: nat | MapSpec(size, underlying, mappingFunc, pos) @@ -516,11 +638,11 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserTheorems { lemma Map_Map2(size: nat, underlying: Parser, mappingFunc: R --> U, pos: nat) requires MapSpec(size, underlying, mappingFunc, pos) ensures - && Map__(size, underlying, mappingFunc)(pos) == Map_(size, underlying, mappingFunc)(pos) + && Map__(size, underlying, mappingFunc)(pos) == Map(size, underlying, mappingFunc)(pos) { - reveal Map_(); + reveal Map(); reveal Map__(); - reveal Bind__(); + reveal BindSucceeds(); reveal Succeed_(); } @@ -530,18 +652,18 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserTheorems { ensures forall pos: nat | ConcatSpec_(size, left, right, pos) :: p.requires(pos) { - Bind__(size, left, (result: R, pos': nat) requires right.requires(pos') => - Bind__(size, right, (u: U, pos'': nat) => Succeed_(size, (result, u)))) + BindSucceeds(size, left, (result: R, pos': nat) requires right.requires(pos') => + BindSucceeds(size, right, (u: U, pos'': nat) => Succeed_(size, (result, u)))) } lemma Concat_Concat2(size: nat, left: Parser, right: Parser, pos: nat) requires ConcatSpec_(size, left, right, pos) ensures BindSpec(size, left, (result: R, pos': nat) requires right.requires(pos') => - Bind__(size, right, (u: U, pos'': nat) => Succeed_(size, (result, u))), pos) + BindSucceeds(size, right, (u: U, pos'': nat) => Succeed_(size, (result, u))), pos) // TODO: Bug to report. Concat_() should not be needed ensures Concat_(size, left, right)(pos) == Concat__(size, left, right)(pos) { - reveal Bind__(); + reveal BindSucceeds(); reveal Concat_(); reveal Concat__(); reveal Succeed_(); @@ -598,7 +720,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserBuilders { :: p.apply.requires(pos) && (p.apply(pos).PSuccess? ==> pos <= p.apply(pos).pos <= size) { - B_(size, Map_(size, apply, mappingFunc)) + B_(size, Map(size, apply, mappingFunc)) } opaque function O(other: ParserBuilder): (p: ParserBuilder) requires size == other.size @@ -621,7 +743,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserBuilders { && apply(pos).PSuccess? && apply(pos).pos <= p.apply(pos).pos <= size) { - B_(size, Bind__(size, apply, (result: R, pos': nat) requires other.requires(result, pos') => other(result, pos').apply)) + B_(size, BindSucceeds(size, apply, (result: R, pos': nat) requires other.requires(result, pos') => other(result, pos').apply)) } opaque function Repeat(init: R, combine: (R, R) -> R): (p: ParserBuilder) @@ -703,20 +825,20 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { // ConcatR(l, r) if l and r succeed consecutively, returns the value of r // Or(l, r) Returns the first of l or r which succeeds // EitherP(l, r) Returns the first of l or r which succeeds, wrapped in Either type - // Char('c') fails with Error if 'c' is not at the given position. + // Char('c') fails with Fatal if 'c' is not at the given position. // Char?('c') fails with Recoverable if 'c' is not at the given position. - // Const("string") fails with Error if "string" is not at the given position. + // Const("string") fails with Fatal if "string" is not at the given position. // Const?("string") fails with Recoverable if "string" is not at the given position. // Rep(parser) repeats the parser as much as possible and returns the sequence of results // Fix((result, pos) => parseResult) returns a parser that recursively applies the provided function when needed // FixMap((result, pos) => parseResult) Same as fix but can provide a mapping from string to functions instead of a single function - // EOS Succeeds if we reached the end of the string, fails with Error otherwise + // EOS Succeeds if we reached the end of the string, fails with Fatal otherwise // FirstOf([p*]) returns the first parser which succeeds // Maybe(p) If p succeeds, returns Some(p.result), otherwise returns None if p fails with Recoverable // DebugParser(msg, p) Prints the given message and pass through p // // Engine defines the following parser builders (please avoid them because they are not performant!) - // C("string") fails with Error if "string" is not at the given position. + // C("string") fails with Fatal if "string" is not at the given position. // C?("string") fails with Recoverable if "string" is not at the given position. // B(p) wraps a regular parser // R(v) A parser builder that returns the given value @@ -742,14 +864,14 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { && left(pos).pos <= p(pos).pos <= |input| && p(pos) == right(left(pos).result, left(pos).pos)(left(pos).pos)) { - Bind__(|input|, left, right) + BindSucceeds(|input|, left, right) } opaque function Epsilon(pos: nat): (pr: ParseResult<()>) requires pos <= |input| ensures pr.PSuccess? && pr.pos == pos { - Epsilon_(|input|)(pos) + Epsilon(|input|)(pos) } opaque function Map(underlying: Parser, mappingFunc: R --> U) @@ -760,7 +882,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { :: && p.requires(pos) && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) { - Map_(|input|, underlying, mappingFunc) + Map(|input|, underlying, mappingFunc) } ghost predicate MapFailureSpec(pos: nat, underlying: Parser) { @@ -860,7 +982,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { { (pos: nat) => if pos < |input| && input[pos] == c then PSuccess(pos + 1, c) - else PFailure(Error, "Expected '"+[c]+"'", pos) + else PFailure(Fatal, "Expected '"+[c]+"'", pos) } opaque function Char?(c: char): (p: Parser) @@ -930,7 +1052,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { { (pos: nat) => if pos + |expected| <= |input| && input[pos..pos + |expected|] == expected then PSuccess(pos + |expected|, expected) - else PFailure(Error, "Expected '"+expected+"'", pos) + else PFailure(Fatal, "Expected '"+expected+"'", pos) } opaque function Const?(expected: string): (p: Parser) @@ -974,7 +1096,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { { var p := Bind(Rep(CharTest?(test)), (result: string, pos': nat) => - if result == "" then Fail_("Did not find an non-empty string satisfying test", Recoverable) + if result == "" then Fail("Did not find an non-empty string satisfying test", Recoverable) else Succeed(result)); assert forall pos: nat | pos <= |input| :: p.requires(pos) && (p(pos).PSuccess? ==> pos < p(pos).pos <= |input|) by { @@ -1106,7 +1228,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { } const EOS: Parser<()> := - (pos: nat) => if pos >= |input| then PSuccess(pos, ()) else PFailure(Error, "Expected end of string", pos) + (pos: nat) => if pos >= |input| then PSuccess(pos, ()) else PFailure(Fatal, "Expected end of string", pos) opaque function FirstOf(others: seq>): (p: ParserBuilder) requires |others| > 0 @@ -1140,10 +1262,10 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { && p(pos).pos == pos + 1) { (pos: nat) requires pos <= |input| => - if pos == |input| then PFailure(if test then Recoverable else Error, "Expected a digit", pos) else + if pos == |input| then PFailure(if test then Recoverable else Fatal, "Expected a digit", pos) else if input[pos] in digitToNat then PSuccess(pos + 1, digitToNat[input[pos]]) - else PFailure(if test then Recoverable else Error, "Expected a digit", pos) + else PFailure(if test then Recoverable else Fatal, "Expected a digit", pos) } ghost predicate RecSpec(fun: string, otherFuns: set, rec: string --> Parser, pos: nat) @@ -1294,24 +1416,24 @@ abstract module ParserTests refines Parsers { lemma AboutFail_(message: string, level: FailureLevel, input: seq) ensures - var p := Fail_(message, level)(input); + var p := Fail(message, level)(input); && p.PFailure? && p.message == message && p.level == level && p.remaining == input { - reveal Fail_(); + reveal Fail(); } lemma AboutFail_2(message: string, input: seq) ensures - var p := Fail_(message)(input); + var p := Fail(message)(input); && p.PFailure? && p.message == message && p.level == Recoverable && p.remaining == input { - reveal Fail_(); + reveal Fail(); } lemma AboutBind_( @@ -1320,7 +1442,7 @@ abstract module ParserTests refines Parsers { input: seq ) ensures - var p := Bind__(left, right)(input); + var p := BindSucceeds(left, right)(input); && var leftResult := left(input); && !leftResult.IsFailure() ==> var leftValues := left(input).Extract(); @@ -1330,18 +1452,18 @@ abstract module ParserTests refines Parsers { && p.remaining == rightResult.remaining && p.result == rightResult.result { - reveal Bind__(); + reveal BindSucceeds(); } lemma AboutMap_(underlying: Parser, mappingFunc: R -> U, input: seq) - ensures var p := Map_(underlying, mappingFunc); + ensures var p := Map(underlying, mappingFunc); && (underlying(input).PSuccess? <==> p(input).PSuccess?) && (p(input).PSuccess? ==> && p(input).remaining == underlying(input).remaining && p(input).result == mappingFunc(underlying(input).result)) { - reveal Map_(); - reveal Bind__(); + reveal Map(); + reveal BindSucceeds(); reveal Succeed_(); } @@ -1352,11 +1474,11 @@ abstract module ParserTests refines Parsers { } lemma AboutMap_Bind_(underlying: Parser, mappingFunc: R -> U, input: seq) - ensures Map_(underlying, mappingFunc)(input) - == Bind__(underlying, BindMapCallback(mappingFunc))(input) + ensures Map(underlying, mappingFunc)(input) + == BindSucceeds(underlying, BindMapCallback(mappingFunc))(input) { - reveal Map_(); - reveal Bind__(); + reveal Map(); + reveal BindSucceeds(); reveal Succeed_(); } @@ -1379,19 +1501,19 @@ abstract module ParserTests refines Parsers { function BindConcatCallback(right: Parser): (L, seq) -> Parser<(L, R)> { (l: L, remaining: seq) => - Map_(right, (r: R) => (l, r)) + Map(right, (r: R) => (l, r)) } lemma AboutConcat_Bind_( left: Parser, right: Parser, input: seq) - ensures Concat_(left, right)(input) == Bind__(left, BindConcatCallback(right))(input) + ensures Concat_(left, right)(input) == BindSucceeds(left, BindConcatCallback(right))(input) { reveal Concat_(); - reveal Bind__(); + reveal BindSucceeds(); reveal Succeed_(); - reveal Map_(); + reveal Map(); } lemma AboutConcatR_( @@ -1419,12 +1541,12 @@ abstract module ParserTests refines Parsers { left: Parser, right: Parser, input: seq) - ensures Map_(Concat_(left, right), second())(input) == ConcatR_(left, right)(input) + ensures Map(Concat_(left, right), second())(input) == ConcatR_(left, right)(input) { reveal Concat_(); reveal Succeed_(); reveal ConcatR_(); - reveal Map_(); + reveal Map(); } @@ -1446,12 +1568,12 @@ abstract module ParserTests refines Parsers { left: Parser, right: Parser, input: seq) - ensures Map_(Concat_(left, right), first())(input) == ConcatL_(left, right)(input) + ensures Map(Concat_(left, right), first())(input) == ConcatL_(left, right)(input) { reveal Concat_(); reveal Succeed_(); reveal ConcatL_(); - reveal Map_(); + reveal Map(); } lemma AboutRepeat_( From 77eb10036469f48e7a4d73112402565b2775c6ed Mon Sep 17 00:00:00 2001 From: Mikael Mayer Date: Mon, 13 Nov 2023 08:40:48 -0600 Subject: [PATCH 06/22] FailureData + imported two parsers --- src/Parsers/exampleArithmetic.dfy | 91 ++--------- src/Parsers/parser.dfy | 256 ++++++++++++++++-------------- 2 files changed, 156 insertions(+), 191 deletions(-) diff --git a/src/Parsers/exampleArithmetic.dfy b/src/Parsers/exampleArithmetic.dfy index 25034375..54bacf7f 100644 --- a/src/Parsers/exampleArithmetic.dfy +++ b/src/Parsers/exampleArithmetic.dfy @@ -1,73 +1,8 @@ include "parser.dfy" -module {:options "-functionSyntax:4", "-quantifierSyntax:4"} EngineParens { - import opened Parsers - import opened ParserBuilders - import opened ParserEngine - - class EngineParens extends ParserEngine.Engine { - constructor(input: string) { - this.input := input; - } - - const fixmapBase: map> := map[] - - function {:opaque} atom(functions: set): (mapper: ParserMapper) - requires "plus__" in functions - ensures RecSpecOnce("atom", functions, mapper) - { - (rec: ParserSelector, pos: nat) requires RecSpec("atom", functions, rec, pos) => - C?("(").o_I(B(rec("plus__")).I_o(C(")"))) - .O(N().M(n => Number(n))).apply(pos) - } - - function {:opaque} times(functions: set): (mapper: ParserMapper) - requires "atom" in functions - ensures RecSpecOnce("times", functions, mapper) - { - (rec: ParserSelector, pos: nat) requires RecSpec("times", functions, rec, pos) => - Bind(rec("atom"), (expr: Expression, pos': nat) => - RepAcc(Concat(Or(Const?("*"), Or(Const?("/"), Const?("%"))), rec("atom")), expr, Expression.InfixBuilder()))(pos) - } - - function {:opaque} plus(functions: set): (mapper: ParserMapper) - requires "times" in functions - ensures RecSpecOnce("plus__", functions, mapper) - { - (rec: ParserSelector, pos: nat) requires RecSpec("plus__", functions, rec, pos) => - Bind(rec("times"), (expr: Expression, pos': nat) => - RepAcc(Concat(Or(Const?("+"), Const?("-")), rec("times")), expr, Expression.InfixBuilder()))(pos) - } - - /* The DSL makes verification 7/2 slower (7M resource units vs 2M resource units above*/ - /*function {:opaque} plus(functions: set): (mapper: ParserMapper) - requires "times" in functions - ensures FixMapInnerOnce("plus__", mapper, functions, |input|) - { - (rec: ParserSelector, pos: nat) - requires RecSpec("plus__", functions, rec, pos) => - B(rec("times")).Then((expr: Expression, pos': nat) => - C?("+").o_I(B(rec("times"))) - .Repeat(expr, Expression.BinaryBuilder("+"))).apply(pos) - }*/ - - function {:opaque} ExpressionsFix(): (r: ParseResult) - { - var functions := {"atom", "times", "plus__"}; - var underlying := fixmapBase[ - "atom" := atom(functions)][ - "times" := times(functions)][ - "plus__" := plus(functions)]; - FixMap(underlying, "plus__")(0) - } - } - - function NatToString(n: nat): string { - if 0 <= n <= 9 then - ["0123456789"[n]] - else - NatToString(n/10) + NatToString(n%10) - } +module ArithmeticParser { + import opened StringParsers + import opened Printer datatype Expression = | Binary(op: string, left: Expression, right: Expression) @@ -85,7 +20,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} EngineParens { requires level <= 2 { match this - case Number(x) => NatToString(x) + case Number(x) => Printer.natToString(x) case Binary(op, left, right) => (match level case 0 => "(" case 1 => "[" case 2 => "{") + left.ToString((level + 1)%3) + op + right.ToString((level + 1) % 3) @@ -93,6 +28,9 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} EngineParens { } } + const parser: Parser + := Succeed(Number(1)) + function repeat(str: string, n: nat): (r: string) ensures |r| == |str| * n { @@ -100,18 +38,23 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} EngineParens { else str + repeat(str, n-1) } + method Main(args: seq) { if |args| <= 1 { return; } for i := 1 to |args| { var input := args[i]; - var engine := new EngineParens(input); - match engine.ExpressionsFix() { - case PSuccess(_, n) => print "result:", n.ToString(0); - case PFailure(level, error, pos) => print input, "\n"; + Succeed_NonCrashingAuto(); + assert Valid(parser); + reveal Valid(); + match parser(input) { + case PSuccess(result, _) => + print "result:", result.ToString(0), "\n"; + case PFailure(level, failureData) => print input, "\n"; + var pos: nat := |input| - |failureData.remaining|; // Need the parser to be Valid() print repeat(" ", pos), "^","\n"; - print error; + print failureData.message; } print "\n"; } diff --git a/src/Parsers/parser.dfy b/src/Parsers/parser.dfy index 2d25087e..59adc413 100644 --- a/src/Parsers/parser.dfy +++ b/src/Parsers/parser.dfy @@ -12,10 +12,19 @@ abstract module Parsers { datatype Either<+L, +R> = // Type to return when using the Or parser Left(l: L) | Right(r: R) - + + datatype FailureData = FailureData(message: string, remaining: seq, next: Option) + { + function Concat(other: FailureData): FailureData { + if next == Option.None then this.(next := Option.Some(other)) + else + FailureData(message, remaining, Option.Some(next.value.Concat(other))) + } + } datatype FailureLevel = - // PFailure level for parse results. A Fatal error will be propagated to the top + // PFailure level for parse results. A Fatal error results in a unique FailurePosition + // and will be propagated to the top, // while a Recoverable can be caught by a disjunctive pattern. // For example, if the parser Const?() fails, then it returns a Recoverable, // but the parser Const() will return an error. @@ -23,11 +32,14 @@ abstract module Parsers { datatype ParseResult<+R> = // ParseResult is a failure-compatible type - | PFailure(level: FailureLevel, message: string, remaining: seq) + | PFailure(level: FailureLevel, data: FailureData) // Returned if a parser failed | PSuccess(result: R, remaining: seq) // Returned if a parser succeeds, with the increment in the position { + function Remaining(): seq { + if PSuccess? then remaining else data.remaining + } predicate IsFailure() { PFailure? } @@ -45,7 +57,7 @@ abstract module Parsers { function PropagateFailure(): ParseResult requires IsFailure() { - PFailure(level, message, remaining) + PFailure(level, data) } function Extract(): (R, seq) @@ -58,12 +70,19 @@ abstract module Parsers { match this case PSuccess(result, remaining) => PSuccess(f(result), remaining) - case PFailure(level, message, remaining) => - PFailure(level, message, remaining) + case PFailure(level, data) => + PFailure(level, data) + } + + function MapRecoverableError(f: FailureData -> FailureData): ParseResult { + match this + case PFailure(Recoverable, data) => + PFailure(Recoverable, f(data)) + case _ => this } function IfRecoverableFailureNoProgress(input: seq, right: Parser): ParseResult { - if PFailure? && level == Recoverable && !Committed(input, remaining) then + if PFailure? && level == Recoverable && !Committed(input, Remaining()) then right(input) else this @@ -90,20 +109,20 @@ abstract module Parsers { && input[|input|-|remaining|..] == remaining } - opaque ghost predicate NonCrashing(underlying: Parser) + opaque ghost predicate Valid(underlying: Parser) // A parser is valid for an input if it never returns a fatal error // and always returns a suffix of its input { forall input: seq :: && (underlying(input).PFailure? ==> underlying(input).level == Recoverable) - && IsRemaining(input, underlying(input).remaining) + && IsRemaining(input, underlying(input).Remaining()) } // Parser combinators. // The following functions make it possible to create and compose parsers // All these combinators provide non-crashing parsers if their inputs are noncrashing - opaque function Succeed_(result: R): (p: Parser) + opaque function Succeed(result: R): (p: Parser) // A parser that does not consume any input and returns the given value // This is a generic function, it's better to use the Succeed function on strings. { @@ -111,21 +130,21 @@ abstract module Parsers { } lemma Succeed_NonCrashing(result: R) - ensures NonCrashing(Succeed_(result)) - { reveal NonCrashing(), Succeed_(); } + ensures Valid(Succeed(result)) + { reveal Valid(), Succeed(); } lemma Succeed_NonCrashingAuto() - ensures forall result: R :: NonCrashing(Succeed_(result)) - { reveal NonCrashing(), Succeed_(); } + ensures forall result: R :: Valid(Succeed(result)) + { reveal Valid(), Succeed(); } opaque function Epsilon(): (p: Parser<()>) { - Succeed_(()) + Succeed(()) } lemma Epsilon_NonCrashing() - ensures NonCrashing(Epsilon()) - { reveal NonCrashing(), Epsilon(); Succeed_NonCrashing(()); } + ensures Valid(Epsilon()) + { reveal Valid(), Epsilon(); Succeed_NonCrashing(()); } lemma AboutEpsilon_(input: seq) ensures @@ -134,22 +153,22 @@ abstract module Parsers { && p(input).remaining == input { reveal Epsilon(); - reveal Succeed_(); + reveal Succeed(); } opaque function Fail(message: string, level: FailureLevel := Recoverable): Parser // A parser that does not consume any input and returns the given failure { - (input: seq) => PFailure(level, message, input) + (input: seq) => PFailure(level, FailureData(message, input, Option.None)) } lemma Fail_NonCrashing(message: string) - ensures NonCrashing(Fail(message, Recoverable)) - { reveal Fail(); reveal NonCrashing(); } + ensures Valid(Fail(message, Recoverable)) + { reveal Fail(); reveal Valid(); } lemma Fail_NonCrashingAuto() - ensures forall message :: NonCrashing(Fail(message, Recoverable)) - { reveal Fail(); reveal NonCrashing(); } + ensures forall message :: Valid(Fail(message, Recoverable)) + { reveal Fail(); reveal Valid(); } opaque function Bind( left: Parser, @@ -194,42 +213,42 @@ abstract module Parsers { } ghost predicate BindRightNonCrashing(right: (L, seq) -> Parser) { - forall l: L, input: seq :: NonCrashing(right(l, input)) + forall l: L, input: seq :: Valid(right(l, input)) } lemma Bind_NonCrashing( left: Parser, right: (L, seq) -> Parser ) - requires NonCrashing(left) + requires Valid(left) requires BindRightNonCrashing(right) - ensures NonCrashing(BindSucceeds(left, right)) + ensures Valid(BindSucceeds(left, right)) { - reveal BindSucceeds(), NonCrashing(); + reveal BindSucceeds(), Valid(); var p := BindSucceeds(left, right); forall input: seq ensures && (p(input).PFailure? ==> p(input).level == Recoverable) - && IsRemaining(input, p(input).remaining) + && IsRemaining(input, p(input).Remaining()) { } } ghost predicate Bind_NonCrashingRight(left: Parser) - requires NonCrashing(left) + requires Valid(left) { forall right: (L, seq) -> Parser | BindRightNonCrashing(right) :: - NonCrashing(BindSucceeds(left, right)) + Valid(BindSucceeds(left, right)) } lemma Bind_NonCrashingAuto() - ensures forall left: Parser | NonCrashing(left) :: + ensures forall left: Parser | Valid(left) :: Bind_NonCrashingRight(left) { - forall left: Parser | NonCrashing(left), + forall left: Parser | Valid(left), right: (L, seq) -> Parser | BindRightNonCrashing(right) ensures - NonCrashing(BindSucceeds(left, right)) + Valid(BindSucceeds(left, right)) { Bind_NonCrashing(left, right); } @@ -256,7 +275,7 @@ abstract module Parsers { if l.IsFailure() then if l.IsFatal() then l.PropagateFailure() else PSuccess((), input) - else PFailure(Recoverable, "Not failed", input) + else PFailure(Recoverable, FailureData("Not failed", input, Option.None)) } opaque function And( @@ -286,6 +305,12 @@ abstract module Parsers { var p := Map(left, l => Left(l))(input); p.IfRecoverableFailureNoProgress(input, Map(right, r => Right(r))) + .MapRecoverableError(dataRight => + if p.IsFailure() && p.level.Recoverable? then + p.data.Concat(dataRight) + else + dataRight + ) } opaque function Lookahead(underlying: Parser): (p: Parser) @@ -296,8 +321,13 @@ abstract module Parsers { { (input: seq) => var p := underlying(input); - if p.IsFatalFailure() then p - else p.(remaining := input) + if p.IsFailure() then + if p.IsFatal() then + p + else + p.(data := FailureData(p.data.message, input, Option.None)) + else + p.(remaining := input) } opaque function If( @@ -335,7 +365,7 @@ abstract module Parsers { => var lResult := left(input); if lResult.PFailure? && lResult.level == Recoverable then - PFailure(Recoverable, lResult.message, input) + PFailure(Recoverable, FailureData(lResult.data.message, input, Option.None)) else var (l, remaining) :- left(input); var (r, remaining2) :- right(remaining); @@ -387,9 +417,9 @@ abstract module Parsers { case PSuccess(result, remaining) => if |remaining| >= |input| then PSuccess(acc + [result], input) else Repeat_(underlying, acc + [result], remaining) - case PFailure(Fatal, message, remaining) => - PFailure(Fatal, message, remaining) - case PFailure(Recoverable, message, remaining) => + case PFailure(Fatal, data) => + PFailure(Fatal, data) + case PFailure(Recoverable, data) => PSuccess(acc, input) } @@ -418,9 +448,9 @@ abstract module Parsers { if |remaining| < |input| then Fixpoint_(underlying, remaining) else if |remaining| == |input| then - PFailure(Recoverable, "No progress", remaining) + PFailure(Recoverable, FailureData("No progress", remaining, Option.None)) else - PFailure(Fatal, "Fixpoint called with an increasing remaining sequence", remaining); + PFailure(Fatal, FailureData("Fixpoint called with an increasing remaining sequence", remaining, Option.None)); underlying(callback)(input) } /*opaque function FixpointMap( @@ -445,7 +475,7 @@ abstract module Parsers { // f = pos => underlying[fun](f, pos) decreases |input|, if fun in underlying then underlying[fun].order else 0 { - if fun !in underlying then PFailure(Fatal, "Parser '"+fun+"' not found", input) else + if fun !in underlying then PFailure(Fatal, FailureData("Parser '"+fun+"' not found", input, Option.None)) else var RecursiveDef(orderFun, definitionFun) := underlying[fun]; var callback: ParserSelector := @@ -459,10 +489,10 @@ abstract module Parsers { if |remaining| < |input| || (|remaining| == |input| && orderFun' < orderFun) then FixpointMap_(underlying, fun', remaining) else if |remaining| == |input| then - PFailure(Recoverable, "Non progressing recursive call requires that order of '" - +fun'+"' ("+Printer.natToString(orderFun')+") is lower than the order of '"+fun+"' ("+Printer.natToString(orderFun)+")", remaining) + PFailure(Recoverable, FailureData("Non progressing recursive call requires that order of '" + +fun'+"' ("+Printer.natToString(orderFun')+") is lower than the order of '"+fun+"' ("+Printer.natToString(orderFun)+")", remaining, Option.None)) else - PFailure(Fatal, "Parser did not return a suffix of the input", remaining) + PFailure(Fatal, FailureData("Parser did not return a suffix of the input", remaining, Option.None)) ); definitionFun(callback)(input) } @@ -620,7 +650,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserTheorems { ensures forall pos: nat | MapSpec(size, underlying, mappingFunc, pos) :: p.requires(pos) { - var p := BindSucceeds(size, underlying, (result: R, pos': nat) requires mappingFunc.requires(result) => Succeed_(size, mappingFunc(result))); + var p := BindSucceeds(size, underlying, (result: R, pos': nat) requires mappingFunc.requires(result) => Succeed(size, mappingFunc(result))); assert forall pos: nat | MapSpec(size, underlying, mappingFunc, pos) :: p.requires(pos) by { forall pos: nat | MapSpec(size, underlying, mappingFunc, pos) @@ -628,7 +658,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserTheorems { { AboutMap_(size, underlying, mappingFunc, pos); var left := underlying; - var right := (result: R, pos': nat) requires mappingFunc.requires(result) => Succeed_(size, mappingFunc(result)); + var right := (result: R, pos': nat) requires mappingFunc.requires(result) => Succeed(size, mappingFunc(result)); assert BindSpec(size, left, right, pos); } } @@ -643,7 +673,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserTheorems { reveal Map(); reveal Map__(); reveal BindSucceeds(); - reveal Succeed_(); + reveal Succeed(); } opaque function Concat__(ghost size: nat, left: Parser, right: Parser) @@ -653,20 +683,20 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserTheorems { :: p.requires(pos) { BindSucceeds(size, left, (result: R, pos': nat) requires right.requires(pos') => - BindSucceeds(size, right, (u: U, pos'': nat) => Succeed_(size, (result, u)))) + BindSucceeds(size, right, (u: U, pos'': nat) => Succeed(size, (result, u)))) } lemma Concat_Concat2(size: nat, left: Parser, right: Parser, pos: nat) requires ConcatSpec_(size, left, right, pos) ensures BindSpec(size, left, (result: R, pos': nat) requires right.requires(pos') => - BindSucceeds(size, right, (u: U, pos'': nat) => Succeed_(size, (result, u))), pos) + BindSucceeds(size, right, (u: U, pos'': nat) => Succeed(size, (result, u))), pos) // TODO: Bug to report. Concat_() should not be needed ensures Concat_(size, left, right)(pos) == Concat__(size, left, right)(pos) { reveal BindSucceeds(); reveal Concat_(); reveal Concat__(); - reveal Succeed_(); + reveal Succeed(); } } @@ -782,21 +812,21 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserBuilders { } function R_(ghost size: nat, result: R): (p: ParserBuilder) { - B_(size, Succeed_(size, result)) + B_(size, Succeed(size, result)) } datatype FixMapParserBuilder = FixMapParserBuilder(ghost size: nat, ghost functions: set, underlying: map> := map[]) { - static function Empty(ghost size: nat, ghost functions: set): (b: FixMapParserBuilder) ensures b.NonCrashing() { + static function Empty(ghost size: nat, ghost functions: set): (b: FixMapParserBuilder) ensures b.Valid() { FixMapParserBuilder(size, functions, map[]) } - ghost predicate NonCrashing() { + ghost predicate Valid() { forall fun <- underlying :: FixpointMapSpecOnce(fun, underlying[fun], functions, size) } opaque function Add(name: string, mapper: ParserMapper): (f: FixMapParserBuilder) - requires NonCrashing() + requires Valid() requires name !in underlying requires FixpointMapSpecOnce(name, mapper, functions, size) - ensures f.NonCrashing() + ensures f.Valid() ensures f.functions == functions ensures f.size == size ensures name in f.underlying @@ -824,9 +854,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { // ConcatL(l, r) if l and r succeed consecutively, returns the value of l // ConcatR(l, r) if l and r succeed consecutively, returns the value of r // Or(l, r) Returns the first of l or r which succeeds - // EitherP(l, r) Returns the first of l or r which succeeds, wrapped in Either type - // Char('c') fails with Fatal if 'c' is not at the given position. - // Char?('c') fails with Recoverable if 'c' is not at the given position. + // EitherP(l, r) Returns the first of l or r which succeeds, wrapped in Either type0 // Const("string") fails with Fatal if "string" is not at the given position. // Const?("string") fails with Recoverable if "string" is not at the given position. // Rep(parser) repeats the parser as much as possible and returns the sequence of results @@ -975,26 +1003,6 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { EitherP_(|input|, left, right) } - opaque function Char(c: char): (p: Parser) - ensures forall pos: nat :: p.requires(pos) - ensures forall pos: nat :: p(pos).PSuccess? ==> - pos < |input| && p(pos).pos == pos + 1 - { - (pos: nat) => - if pos < |input| && input[pos] == c then PSuccess(pos + 1, c) - else PFailure(Fatal, "Expected '"+[c]+"'", pos) - } - - opaque function Char?(c: char): (p: Parser) - ensures forall pos: nat :: p.requires(pos) - ensures forall pos: nat :: p(pos).PSuccess? ==> - pos < |input| && p(pos).pos == pos + 1 - { - (pos: nat) => - if pos < |input| && input[pos] == c then PSuccess(pos + 1, c) - else PFailure(Recoverable, "Expected a different char but that's ok", pos) - } - // Returns a function that tests if, at the given position, we can find the string toTest opaque function TestString(toTest: string): (test: nat --> bool) ensures forall pos: nat | pos <= |input| :: test.requires(pos) @@ -1047,22 +1055,6 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { B_(|input|, underlying) } - opaque function Const(expected: string): (p: Parser) - ensures ConstSpec(expected, p) - { - (pos: nat) => - if pos + |expected| <= |input| && input[pos..pos + |expected|] == expected then PSuccess(pos + |expected|, expected) - else PFailure(Fatal, "Expected '"+expected+"'", pos) - } - - opaque function Const?(expected: string): (p: Parser) - ensures ConstSpec(expected, p) - { - (pos: nat) => - if pos + |expected| <= |input| && input[pos..pos + |expected|] == expected then PSuccess(pos + |expected|, expected) - else PFailure(Recoverable, "Possibly expecting something else but that's ok", pos) - } - opaque function Maybe(underlying: Parser): (p: Parser>) requires IsRegular(underlying) ensures IsRegular(p) @@ -1218,7 +1210,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { ) } opaque function FixMapBuilder(ghost functions: set): (r: FixMapParserBuilder) - ensures r.NonCrashing() + ensures r.Valid() ensures |r.underlying.Keys| == 0 ensures r.functions == functions ensures r.size == |input| @@ -1407,20 +1399,19 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { } abstract module ParserTests refines Parsers { - lemma AboutSucceed_(result: R, input: seq) + lemma AboutSucceed(result: R, input: seq) ensures - var p := Succeed_(result); + var p := Succeed(result); && p(input).PSuccess? && p(input).remaining == input - { reveal Succeed_(); } + { reveal Succeed(); } lemma AboutFail_(message: string, level: FailureLevel, input: seq) ensures var p := Fail(message, level)(input); && p.PFailure? - && p.message == message + && p.data == FailureData(message, input, Option.None) && p.level == level - && p.remaining == input { reveal Fail(); } @@ -1429,9 +1420,8 @@ abstract module ParserTests refines Parsers { ensures var p := Fail(message)(input); && p.PFailure? - && p.message == message && p.level == Recoverable - && p.remaining == input + && p.data == FailureData(message, input, Option.None) { reveal Fail(); } @@ -1464,13 +1454,13 @@ abstract module ParserTests refines Parsers { { reveal Map(); reveal BindSucceeds(); - reveal Succeed_(); + reveal Succeed(); } function BindMapCallback(mappingFunc: R -> U): (R, seq) -> Parser { - (result: R, remaining: seq) => Succeed_(mappingFunc(result)) + (result: R, remaining: seq) => Succeed(mappingFunc(result)) } lemma AboutMap_Bind_(underlying: Parser, mappingFunc: R -> U, input: seq) @@ -1479,7 +1469,7 @@ abstract module ParserTests refines Parsers { { reveal Map(); reveal BindSucceeds(); - reveal Succeed_(); + reveal Succeed(); } lemma AboutConcat_( @@ -1512,7 +1502,7 @@ abstract module ParserTests refines Parsers { { reveal Concat_(); reveal BindSucceeds(); - reveal Succeed_(); + reveal Succeed(); reveal Map(); } @@ -1544,7 +1534,7 @@ abstract module ParserTests refines Parsers { ensures Map(Concat_(left, right), second())(input) == ConcatR_(left, right)(input) { reveal Concat_(); - reveal Succeed_(); + reveal Succeed(); reveal ConcatR_(); reveal Map(); } @@ -1571,7 +1561,7 @@ abstract module ParserTests refines Parsers { ensures Map(Concat_(left, right), first())(input) == ConcatL_(left, right)(input) { reveal Concat_(); - reveal Succeed_(); + reveal Succeed(); reveal ConcatL_(); reveal Map(); } @@ -1585,10 +1575,10 @@ abstract module ParserTests refines Parsers { // returns a remaining that is a suffix of the input, // then Repeat with always return a success decreases |input| - requires NonCrashing(underlying) + requires Valid(underlying) ensures Repeat_(underlying, acc, input).PSuccess? { - reveal Repeat_(), NonCrashing(); + reveal Repeat_(), Valid(); assert IsRemaining(input, input[0..]); } @@ -1606,18 +1596,18 @@ abstract module ParserTests refines Parsers { (|acc| < |result.result| && |result.remaining| < |input|)) } - lemma AboutRepeatIncreasesPosIfUnderlyingSucceedsAtLeastOnce( + lemma {:vcs_split_on_every_assert} AboutRepeatIncreasesPosIfUnderlyingSucceedsAtLeastOnce( underlying: Parser, acc: seq, input: seq ) decreases |input| - requires NonCrashing(underlying) + requires Valid(underlying) ensures AboutRepeatIncreasesPosIfUnderlyingSucceedsAtLeastOnceEnsures (underlying, acc, input) { - reveal Repeat_(), NonCrashing(); + reveal Repeat_(), Valid(); var _ := input[0..]; match underlying(input) case PSuccess(result, remaining) => @@ -1635,13 +1625,13 @@ abstract module ParserTests refines Parsers { p.PSuccess? ==> IsRemaining(input, p.remaining) } - lemma AboutFixpoint_( + lemma {:vcs_split_on_every_assert} AboutFixpoint_( underlying: Parser -> Parser, input: seq) requires forall callback: Parser, u: seq | underlying(callback)(u).PSuccess? - :: IsRemaining(input, underlying(callback)(input).remaining) + :: IsRemaining(input, underlying(callback)(input).Remaining()) ensures AboutFixpoint_Ensures(underlying, input) { reveal Fixpoint_(); @@ -1657,4 +1647,36 @@ abstract module ParserTests refines Parsers { && (p.PSuccess? ==> IsRemaining(input, p.remaining)) } +} + +module StringParsers refines ParserTests { + type C = char + + opaque function Char(c: char): (p: Parser) + { + (input: string) => + if 0 < |input| && input[0] == c then PSuccess(c, input[1..]) + else PFailure(Fatal, FailureData("Expected '"+[c]+"'", input, Option.None)) + } + + opaque function Char?(c: char): (p: Parser) + { + (input: string) => + if 0 < |input| && input[0] == c then PSuccess(c, input[1..]) + else PFailure(Recoverable, FailureData("Expected '"+[c]+"'", input, Option.None)) + } + + opaque function Const(expected: string): (p: Parser) + { + (input: string) => + if |expected| <= |input| && input[0..|expected|] == expected then PSuccess(expected, input[|expected|..]) + else PFailure(Fatal, FailureData("Expected '"+expected+"'", input, Option.None)) + } + + opaque function Const?(expected: string): (p: Parser) + { + (input: string) => + if |expected| <= |input| && input[0..|expected|] == expected then PSuccess(expected, input[|expected|..]) + else PFailure(Recoverable, FailureData("Expected '"+expected+"'", input, Option.None)) + } } \ No newline at end of file From 50d51ea603bd8ce11377d5d4478ee9ec90688fba Mon Sep 17 00:00:00 2001 From: Mikael Mayer Date: Mon, 13 Nov 2023 09:04:33 -0600 Subject: [PATCH 07/22] Refactored print failure in string parsers --- src/Parsers/exampleArithmetic.dfy | 20 +++------ src/Parsers/library.dfy | 6 ++- src/Parsers/parser.dfy | 70 ++++++++++++++++++++++++++++--- 3 files changed, 75 insertions(+), 21 deletions(-) diff --git a/src/Parsers/exampleArithmetic.dfy b/src/Parsers/exampleArithmetic.dfy index 54bacf7f..b04e1c27 100644 --- a/src/Parsers/exampleArithmetic.dfy +++ b/src/Parsers/exampleArithmetic.dfy @@ -6,7 +6,7 @@ module ArithmeticParser { datatype Expression = | Binary(op: string, left: Expression, right: Expression) - | Number(x: nat) + | Number(x: int) { static function BinaryBuilder(op: string): (Expression, Expression) -> Expression { @@ -20,7 +20,7 @@ module ArithmeticParser { requires level <= 2 { match this - case Number(x) => Printer.natToString(x) + case Number(x) => (if x < 0 then "-" else "") + Printer.natToString(if x < 0 then -x else x) case Binary(op, left, right) => (match level case 0 => "(" case 1 => "[" case 2 => "{") + left.ToString((level + 1)%3) + op + right.ToString((level + 1) % 3) @@ -29,15 +29,7 @@ module ArithmeticParser { } const parser: Parser - := Succeed(Number(1)) - - function repeat(str: string, n: nat): (r: string) - ensures |r| == |str| * n - { - if n == 0 then "" - else str + repeat(str, n-1) - } - + := Map(Int(), (i: int) => Number(i)) method Main(args: seq) { if |args| <= 1 { @@ -51,10 +43,8 @@ module ArithmeticParser { match parser(input) { case PSuccess(result, _) => print "result:", result.ToString(0), "\n"; - case PFailure(level, failureData) => print input, "\n"; - var pos: nat := |input| - |failureData.remaining|; // Need the parser to be Valid() - print repeat(" ", pos), "^","\n"; - print failureData.message; + case failure => + PrintFailure(input, failure); } print "\n"; } diff --git a/src/Parsers/library.dfy b/src/Parsers/library.dfy index ca73b77d..4befa241 100644 --- a/src/Parsers/library.dfy +++ b/src/Parsers/library.dfy @@ -1,8 +1,12 @@ module {:options "/functionSyntax:4"} Printer { - type stringNat = s: string | + predicate IsStringNat(s: string) { |s| > 0 && (|s| > 1 ==> s[0] != '0') && forall i | 0 <= i < |s| :: s[i] in "0123456789" + } + + type stringNat = s: string | + IsStringNat(s) witness "1" function natToString(n: nat): stringNat { diff --git a/src/Parsers/parser.dfy b/src/Parsers/parser.dfy index 59adc413..2943520a 100644 --- a/src/Parsers/parser.dfy +++ b/src/Parsers/parser.dfy @@ -342,6 +342,16 @@ abstract module Parsers { succeed(input) } + opaque function Maybe(underlying: Parser): Parser> + { + (input: seq) => + var u := underlying(input); + if u.IsFatalFailure() then u.PropagateFailure() + else + if u.PSuccess? then u.MapResult(result => Option.Some(result)) + else PSuccess(Option.None, input) + } + opaque function Concat_( left: Parser, right: Parser @@ -855,8 +865,6 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { // ConcatR(l, r) if l and r succeed consecutively, returns the value of r // Or(l, r) Returns the first of l or r which succeeds // EitherP(l, r) Returns the first of l or r which succeeds, wrapped in Either type0 - // Const("string") fails with Fatal if "string" is not at the given position. - // Const?("string") fails with Recoverable if "string" is not at the given position. // Rep(parser) repeats the parser as much as possible and returns the sequence of results // Fix((result, pos) => parseResult) returns a parser that recursively applies the provided function when needed // FixMap((result, pos) => parseResult) Same as fix but can provide a mapping from string to functions instead of a single function @@ -1659,11 +1667,40 @@ module StringParsers refines ParserTests { else PFailure(Fatal, FailureData("Expected '"+[c]+"'", input, Option.None)) } - opaque function Char?(c: char): (p: Parser) + opaque function Char?(expectedChar: char): (p: Parser) + { + CharTest?((c: char) => c == expectedChar, [expectedChar]) + } + + opaque function CharTest?(test: char -> bool, name: string): (p: Parser) { (input: string) => - if 0 < |input| && input[0] == c then PSuccess(c, input[1..]) - else PFailure(Recoverable, FailureData("Expected '"+[c]+"'", input, Option.None)) + if 0 < |input| && test(input[0]) then PSuccess(input[0], input[1..]) + else PFailure(Recoverable, + FailureData("Expected a "+name+" but got " + + (if 0 < |input| then "'"+[input[0]]+"'" else "end of string") + , input, Option.None)) + } + + opaque function Digit(): (p: Parser) + { + CharTest?(c => c in "0123456789", "digit") + } + + opaque function Nat(): (p: Parser) + { + Map(Repeat(Digit()), + result => + if Printer.IsStringNat(result) then // Should always be true + Printer.stringToNat(result) + else 0) + } + + opaque function Int(): (p: Parser) + { + Bind(Maybe(Char?('-')), + (minusSign: Option) => + Map(Nat(), (result: nat) => if minusSign.Some? then -result else result)) } opaque function Const(expected: string): (p: Parser) @@ -1679,4 +1716,27 @@ module StringParsers refines ParserTests { if |expected| <= |input| && input[0..|expected|] == expected then PSuccess(expected, input[|expected|..]) else PFailure(Recoverable, FailureData("Expected '"+expected+"'", input, Option.None)) } + + + function repeat(str: string, n: nat): (r: string) + ensures |r| == |str| * n + { + if n == 0 then "" + else str + repeat(str, n-1) + } + + // TODO: Mention the error level, the line number, the column number + // TODO: Extract only the line of interest + method PrintFailure(input: string, result: ParseResult) + requires result.PFailure? + { + var pos: int := |input| - |result.data.remaining|; // Need the parser to be Valid() + if pos < 0 { // Could be proved false if parser is Valid() + pos := 0; + } + print input, "\n"; + print repeat(" ", pos), "^","\n"; + print result.data.message; + } + } \ No newline at end of file From e84b4582911da8e5b0eee3136cd4a4e521d62a93 Mon Sep 17 00:00:00 2001 From: Mikael Mayer Date: Mon, 13 Nov 2023 15:06:53 -0600 Subject: [PATCH 08/22] Arithmetic parser easy to define and render! --- src/Parsers/exampleArithmetic.dfy | 37 ++++++++- src/Parsers/parser.dfy | 127 +++++++++++++++--------------- 2 files changed, 100 insertions(+), 64 deletions(-) diff --git a/src/Parsers/exampleArithmetic.dfy b/src/Parsers/exampleArithmetic.dfy index b04e1c27..7982be9b 100644 --- a/src/Parsers/exampleArithmetic.dfy +++ b/src/Parsers/exampleArithmetic.dfy @@ -28,8 +28,41 @@ module ArithmeticParser { } } + const buildParsedExpr: ((Expression, Wrappers.Option<(string, Expression)>)) -> Expression + := ((result: (Expression, Wrappers.Option<(string, Expression)>)) => + if result.1.None? then result.0 else + Binary(result.1.value.0, result.0, result.1.value.1)) + const parser: Parser - := Map(Int(), (i: int) => Number(i)) + := FixpointMap( + map[ + "atom" := + RecursiveDef(0, (callback: ParserSelector) => + Or(ConcatR( + Const?("("), + ConcatL(callback("term"), Const(")"))), + Map(Int(), (result: int) => Number(result)) + )), + "factor" := + RecursiveDef(1, (callback: ParserSelector) => + Map( + Concat( + callback("atom"), + Maybe(Concat(Or(Const?("*"), Const?("/")), + callback("factor")))), + buildParsedExpr + )), + "term" := + RecursiveDef(2, (callback: ParserSelector) => + Map( + Concat(callback("factor"), + Maybe(Concat(Or(Const?("+"), Const?("-")), + callback("term")))), + buildParsedExpr + )) + ], + "term" + ); method Main(args: seq) { if |args| <= 1 { @@ -38,7 +71,7 @@ module ArithmeticParser { for i := 1 to |args| { var input := args[i]; Succeed_NonCrashingAuto(); - assert Valid(parser); + assume {:axiom} Valid(parser); // TODO: Prove reveal Valid(); match parser(input) { case PSuccess(result, _) => diff --git a/src/Parsers/parser.dfy b/src/Parsers/parser.dfy index 2943520a..7c8a9053 100644 --- a/src/Parsers/parser.dfy +++ b/src/Parsers/parser.dfy @@ -94,7 +94,7 @@ abstract module Parsers { // Because it returns a delta pos, it cannot return a position negative from the origing // If the parsing is out of context, it will return a failure. - type ParserSelector = string -> Option> + type ParserSelector = string -> Parser // A parser selector is a function that, given a name that exists, // returns a parser associated to this name @@ -291,7 +291,7 @@ abstract module Parsers { PSuccess((l, r), remainingRight) } - opaque function Or( + opaque function DisjunctiveOr( left: Parser, right: Parser ) : (p: Parser>) @@ -313,6 +313,27 @@ abstract module Parsers { ) } + opaque function Or( + left: Parser, + right: Parser + ) : (p: Parser) + // left parses the string. If left succeeds, returns + // if left fails, two cases + // - If the error is recoverable and the parser did not consume input, + // then return what right returns + // - Otherwise return the first error + { + (input: seq) => + var p := left(input); + p.IfRecoverableFailureNoProgress(input, right) + .MapRecoverableError(dataRight => + if p.IsFailure() && p.level.Recoverable? then + p.data.Concat(dataRight) + else + dataRight + ) + } + opaque function Lookahead(underlying: Parser): (p: Parser) // If the underlying parser succeeds, returns its result without committing the input // if the underlying parser fails, @@ -352,7 +373,7 @@ abstract module Parsers { else PSuccess(Option.None, input) } - opaque function Concat_( + opaque function Concat( left: Parser, right: Parser ) : (p: Parser<(L, R)>) @@ -365,24 +386,7 @@ abstract module Parsers { PSuccess((l, r), remaining2) } - opaque function ConcatLeftNonCommitting( - left: Parser, - right: Parser - ) : (p: Parser<(L, R)>) - // Makes it possible to concatenate two consecutive parsers and return the pair of the results - { - (input: seq) - => - var lResult := left(input); - if lResult.PFailure? && lResult.level == Recoverable then - PFailure(Recoverable, FailureData(lResult.data.message, input, Option.None)) - else - var (l, remaining) :- left(input); - var (r, remaining2) :- right(remaining); - PSuccess((l, r), remaining2) - } - - opaque function ConcatR_( + opaque function ConcatR( left: Parser, right: Parser ) : (p: Parser) @@ -395,7 +399,7 @@ abstract module Parsers { PSuccess(r, remaining2) } - opaque function ConcatL_( + opaque function ConcatL( left: Parser, right: Parser ) : (p: Parser) @@ -463,16 +467,16 @@ abstract module Parsers { PFailure(Fatal, FailureData("Fixpoint called with an increasing remaining sequence", remaining, Option.None)); underlying(callback)(input) } - /*opaque function FixpointMap( - maxPos: nat, - underlying: map, nat) -> ParseResult>, - fun: string): Parser + opaque function FixpointMap( + underlying: map>, + fun: string): (p: Parser) { - (pos: nat) => FixpointMap_(maxPos, underlying, fun, pos) - }*/ + (input: seq) => FixpointMap_(underlying, fun, input) + } + datatype RecursiveDef = RecursiveDef( order: nat, - definition: (ParserSelector) -> Parser + definition: ParserSelector -> Parser ) opaque function FixpointMap_( underlying: map>, @@ -491,10 +495,9 @@ abstract module Parsers { := (fun': string) => if fun' !in underlying.Keys then - Option.None + Fail(fun' + " not defined", Fatal) else var RecursiveDef(orderFun', definitionFun') := underlying[fun']; - Option.Some( (remaining: seq) => if |remaining| < |input| || (|remaining| == |input| && orderFun' < orderFun) then FixpointMap_(underlying, fun', remaining) @@ -503,7 +506,7 @@ abstract module Parsers { +fun'+"' ("+Printer.natToString(orderFun')+") is lower than the order of '"+fun+"' ("+Printer.natToString(orderFun)+")", remaining, Option.None)) else PFailure(Fatal, FailureData("Parser did not return a suffix of the input", remaining, Option.None)) - ); + ; definitionFun(callback)(input) } @@ -686,7 +689,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserTheorems { reveal Succeed(); } - opaque function Concat__(ghost size: nat, left: Parser, right: Parser) + opaque function Concat_(ghost size: nat, left: Parser, right: Parser) : (p: Parser<(R, U)>) // Concat is equivalent to two binds methods ensures forall pos: nat | ConcatSpec_(size, left, right, pos) @@ -696,16 +699,16 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserTheorems { BindSucceeds(size, right, (u: U, pos'': nat) => Succeed(size, (result, u)))) } - lemma Concat_Concat2(size: nat, left: Parser, right: Parser, pos: nat) + lemma ConcatConcat2(size: nat, left: Parser, right: Parser, pos: nat) requires ConcatSpec_(size, left, right, pos) ensures BindSpec(size, left, (result: R, pos': nat) requires right.requires(pos') => BindSucceeds(size, right, (u: U, pos'': nat) => Succeed(size, (result, u))), pos) - // TODO: Bug to report. Concat_() should not be needed - ensures Concat_(size, left, right)(pos) == Concat__(size, left, right)(pos) + // TODO: Bug to report. Concat() should not be needed + ensures Concat(size, left, right)(pos) == Concat_(size, left, right)(pos) { reveal BindSucceeds(); + reveal Concat(); reveal Concat_(); - reveal Concat__(); reveal Succeed(); } } @@ -741,7 +744,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserBuilders { :: p.apply.requires(pos) && (p.apply(pos).PSuccess? ==> pos <= p.apply(pos).pos <= size) { - B_(size, ConcatR_(size, apply, other.apply)) + B_(size, ConcatR(size, apply, other.apply)) } opaque function I_o(other: ParserBuilder): (p: ParserBuilder) requires size == other.size @@ -751,7 +754,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserBuilders { :: p.apply.requires(pos) && (p.apply(pos).PSuccess? ==> pos <= p.apply(pos).pos <= size) { - B_(size, ConcatL_(size, apply, other.apply)) + B_(size, ConcatL(size, apply, other.apply)) } opaque function M(mappingFunc: R --> U): (p: ParserBuilder) ensures p.size == size @@ -956,7 +959,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { :: p.requires(pos) && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) { - Concat_(|input|, left, right) + Concat(|input|, left, right) } opaque function ConcatR( @@ -969,7 +972,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { :: p.requires(pos) && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) { - ConcatR_(|input|, left, right) + ConcatR(|input|, left, right) } opaque function ConcatL( @@ -982,7 +985,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { :: p.requires(pos) && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) { - ConcatL_(|input|, left, right) + ConcatL(|input|, left, right) } opaque function Or( @@ -1480,11 +1483,11 @@ abstract module ParserTests refines Parsers { reveal Succeed(); } - lemma AboutConcat_( + lemma AboutConcat( left: Parser, right: Parser, input: seq) - ensures var p := Concat_(left, right); + ensures var p := Concat(left, right); && (p(input).PSuccess? ==> && left(input).PSuccess? && p(input).result.0 == left(input).result @@ -1493,7 +1496,7 @@ abstract module ParserTests refines Parsers { && p(input).result.1 == right(input2).result && p(input).remaining == right(input2).remaining) { - reveal Concat_(); + reveal Concat(); } function BindConcatCallback(right: Parser): (L, seq) -> Parser<(L, R)> @@ -1502,23 +1505,23 @@ abstract module ParserTests refines Parsers { Map(right, (r: R) => (l, r)) } - lemma AboutConcat_Bind_( + lemma AboutConcatBind_( left: Parser, right: Parser, input: seq) - ensures Concat_(left, right)(input) == BindSucceeds(left, BindConcatCallback(right))(input) + ensures Concat(left, right)(input) == BindSucceeds(left, BindConcatCallback(right))(input) { - reveal Concat_(); + reveal Concat(); reveal BindSucceeds(); reveal Succeed(); reveal Map(); } - lemma AboutConcatR_( + lemma AboutConcatR( left: Parser, right: Parser, input: seq) - ensures var p := ConcatR_(left, right); + ensures var p := ConcatR(left, right); && (p(input).PSuccess? ==> && left(input).PSuccess? && var input2 := left(input).remaining; @@ -1526,7 +1529,7 @@ abstract module ParserTests refines Parsers { && p(input).result == right(input2).result && p(input).remaining == right(input2).remaining) { - reveal ConcatR_(); + reveal ConcatR(); } function first(): ((L, R)) -> L { @@ -1535,24 +1538,24 @@ abstract module ParserTests refines Parsers { function second(): ((L, R)) -> R { (lr: (L, R)) => lr.1 } - lemma AboutConcat_ConcatR_( + lemma AboutConcatConcatR( left: Parser, right: Parser, input: seq) - ensures Map(Concat_(left, right), second())(input) == ConcatR_(left, right)(input) + ensures Map(Concat(left, right), second())(input) == ConcatR(left, right)(input) { - reveal Concat_(); + reveal Concat(); reveal Succeed(); - reveal ConcatR_(); + reveal ConcatR(); reveal Map(); } - lemma AboutConcatL_( + lemma AboutConcatL( left: Parser, right: Parser, input: seq) - ensures var p := ConcatL_(left, right); + ensures var p := ConcatL(left, right); && (p(input).PSuccess? ==> && left(input).PSuccess? && var input2 := left(input).remaining; @@ -1560,17 +1563,17 @@ abstract module ParserTests refines Parsers { && p(input).result == left(input).result && p(input).remaining == right(input2).remaining) { - reveal ConcatL_(); + reveal ConcatL(); } - lemma AboutConcat_ConcatL_( + lemma AboutConcatConcatL( left: Parser, right: Parser, input: seq) - ensures Map(Concat_(left, right), first())(input) == ConcatL_(left, right)(input) + ensures Map(Concat(left, right), first())(input) == ConcatL(left, right)(input) { - reveal Concat_(); + reveal Concat(); reveal Succeed(); - reveal ConcatL_(); + reveal ConcatL(); reveal Map(); } From b3ec0071bb83d4942fa50792ecf7d960c6e2d77d Mon Sep 17 00:00:00 2001 From: Mikael Mayer Date: Tue, 14 Nov 2023 11:09:53 -0600 Subject: [PATCH 09/22] Better error reporting --- src/Parsers/exampleArithmetic.dfy | 105 +++++++++---- src/Parsers/library.dfy | 2 +- src/Parsers/parser.dfy | 247 ++++++++++++++++++++---------- 3 files changed, 240 insertions(+), 114 deletions(-) diff --git a/src/Parsers/exampleArithmetic.dfy b/src/Parsers/exampleArithmetic.dfy index 7982be9b..2b8808a9 100644 --- a/src/Parsers/exampleArithmetic.dfy +++ b/src/Parsers/exampleArithmetic.dfy @@ -2,12 +2,41 @@ include "parser.dfy" module ArithmeticParser { import opened StringParsers - import opened Printer + import opened StringNat + + type Result = StringParsers.Wrappers.Result datatype Expression = | Binary(op: string, left: Expression, right: Expression) - | Number(x: int) + | Number(value: int) + | Unknown(power: int) { + function Distribute(): Result { + match this { + case Number(x: int) => Result.Success(this) + case Binary(op, left, right) => + var l :- left.Distribute(); + var r :- right.Distribute(); + if l.Number? && r.Number? then + match op { + case "+" => Result.Success(Number(l.value + r.value)) + case "-" => Result.Success(Number(l.value - r.value)) + case "*" => Result.Success(Number(l.value * r.value)) + case "/" => + if r.value == 0 then Result.Failure("Division by zero (" + right.ToString(0) + " evaluates to zero)") else + Result.Success(Number(l.value / r.value)) + case "%" => + if r.value == 0 then Result.Failure("Modulo by zero (" + right.ToString(0) + " evaluates to zero)") else + Result.Success(Number(l.value % r.value)) + case _ => Result.Failure("Unsupported operator: " + op) + } + else + Result.Success(this) + case Unknown(0) => Result.Success(Number(1)) + case Unknown(_) => + Result.Success(this) + } + } static function BinaryBuilder(op: string): (Expression, Expression) -> Expression { (left: Expression, right: Expression) => Binary(op, left, right) @@ -20,11 +49,15 @@ module ArithmeticParser { requires level <= 2 { match this - case Number(x) => (if x < 0 then "-" else "") + Printer.natToString(if x < 0 then -x else x) + case Number(x) => (if x < 0 then "-" else "") + StringNat.natToString(if x < 0 then -x else x) case Binary(op, left, right) => (match level case 0 => "(" case 1 => "[" case 2 => "{") - + left.ToString((level + 1)%3) + op + right.ToString((level + 1) % 3) + + left.ToString(level) + op + right.ToString(level) + (match level case 0 => ")" case 1 => "]" case 2 => "}") + case Unknown(power) => + if power == 1 then "x" else if power == 0 then "1" else + if power < 0 then "x^(-" + StringNat.natToString(0-power)+")" else + "x^" + StringNat.natToString(power) } } @@ -34,35 +67,44 @@ module ArithmeticParser { Binary(result.1.value.0, result.0, result.1.value.1)) const parser: Parser - := FixpointMap( + := ConcatL( + FixpointMap( map[ "atom" := RecursiveDef(0, (callback: ParserSelector) => Or(ConcatR( - Const?("("), - ConcatL(callback("term"), Const(")"))), - Map(Int(), (result: int) => Number(result)) - )), + String("("), ConcatL( + callback("term"), + String(")"))), + Or( + Map(Int(), (result: int) => Number(result)), ConcatR( + String("x"), + Map(Maybe(ConcatR( + String("^"), Int())), + (result: Option) => + if result.Some? then Unknown(result.value) else Unknown(1) + ))))), "factor" := RecursiveDef(1, (callback: ParserSelector) => - Map( - Concat( - callback("atom"), - Maybe(Concat(Or(Const?("*"), Const?("/")), - callback("factor")))), - buildParsedExpr - )), + Bind(callback("atom"), (atom: Expression) => + RepeatAcc( + Concat(Or(String("*"), Or(String("/"), String("%"))), + callback("atom")), + Expression.InfixBuilder(), atom) + ) + ), "term" := RecursiveDef(2, (callback: ParserSelector) => - Map( - Concat(callback("factor"), - Maybe(Concat(Or(Const?("+"), Const?("-")), - callback("term")))), - buildParsedExpr - )) + Bind(callback("factor"), (factor: Expression) => + RepeatAcc( + Concat(Or(String("+"), String("-")), + callback("factor")), + Expression.InfixBuilder(), factor) + ) + ) ], "term" - ); + ), EndOfString()) method Main(args: seq) { if |args| <= 1 { @@ -70,12 +112,19 @@ module ArithmeticParser { } for i := 1 to |args| { var input := args[i]; - Succeed_NonCrashingAuto(); - assume {:axiom} Valid(parser); // TODO: Prove - reveal Valid(); match parser(input) { - case PSuccess(result, _) => - print "result:", result.ToString(0), "\n"; + case PSuccess(result, remaining) => + if |remaining| != 0 { + print "'" + remaining +"'", "\n"; + PrintFailure(input, PFailure(Recoverable, FailureData("Expected end of string", remaining, Option.None))); + } + print "Computation:", result.ToString(0), "\n"; + match result.Distribute() { + case Success(x) => + print "Result:", x.ToString(0), "\n"; + case Failure(message) => + print message; + } case failure => PrintFailure(input, failure); } diff --git a/src/Parsers/library.dfy b/src/Parsers/library.dfy index 4befa241..defd3a49 100644 --- a/src/Parsers/library.dfy +++ b/src/Parsers/library.dfy @@ -1,4 +1,4 @@ -module {:options "/functionSyntax:4"} Printer { +module {:options "/functionSyntax:4"} StringNat { predicate IsStringNat(s: string) { |s| > 0 && (|s| > 1 ==> s[0] != '0') && diff --git a/src/Parsers/parser.dfy b/src/Parsers/parser.dfy index 7c8a9053..66ea635a 100644 --- a/src/Parsers/parser.dfy +++ b/src/Parsers/parser.dfy @@ -4,7 +4,7 @@ include "library.dfy" // Functional parsers are consuming the string from the left to the right. abstract module Parsers { import Wrappers - import Printer + import StringNat type Option = Wrappers.Option type C(!new, ==) // The character of the sequence being parsed @@ -162,6 +162,13 @@ abstract module Parsers { (input: seq) => PFailure(level, FailureData(message, input, Option.None)) } + opaque function EndOfString(): Parser<()> + { + (input: seq) => + if |input| == 0 then PSuccess((), input) + else PFailure(Recoverable, FailureData("expected end of string", input, Option.None)) + } + lemma Fail_NonCrashing(message: string) ensures Valid(Fail(message, Recoverable)) { reveal Fail(); reveal Valid(); } @@ -275,7 +282,7 @@ abstract module Parsers { if l.IsFailure() then if l.IsFatal() then l.PropagateFailure() else PSuccess((), input) - else PFailure(Recoverable, FailureData("Not failed", input, Option.None)) + else PFailure(Recoverable, FailureData("not failed", input, Option.None)) } opaque function And( @@ -351,6 +358,23 @@ abstract module Parsers { p.(remaining := input) } + opaque function ?(underlying: Parser): (p: Parser) + // If the underlying parser succeeds, returns its committed result + // if the underlying parser fails, + // - If the failure is fatal, returns it as-it + // - If the failure is recoverable, returns it without comitting the input + { + (input: seq) => + var p := underlying(input); + if p.IsFailure() then + if p.IsFatal() then + p + else + p.(data := FailureData(p.data.message, input, Option.None)) + else + p + } + opaque function If( condition: Parser, succeed: Parser @@ -434,7 +458,41 @@ abstract module Parsers { case PFailure(Fatal, data) => PFailure(Fatal, data) case PFailure(Recoverable, data) => - PSuccess(acc, input) + if |data.remaining| == |input| then + PSuccess(acc, input) + else + PFailure(Recoverable, data) + } + + opaque function RepeatAcc( + underlying: Parser, + combine: (A, B) -> A, + acc: A + ): Parser { + (input: seq) => RepeatAcc_(underlying, combine, acc, input) + } + + opaque function {:tailrecursion true} RepeatAcc_( + underlying: Parser, + combine: (A, B) -> A, + acc: A, + input: seq + ): (p: ParseResult) + decreases |input| + // Repeat the underlying parser over the input until a recoverable failure happens + // and returns the accumulated results + { + match underlying(input) + case PSuccess(result, remaining) => + if |remaining| >= |input| then PSuccess(acc, input) else + RepeatAcc_(underlying, combine, combine(acc, result), remaining) + case PFailure(Fatal, data) => + PFailure(Fatal, data) + case PFailure(Recoverable, data) => + if |data.remaining| == |input| then + PSuccess(acc, input) + else + PFailure(Recoverable, data) } opaque function Fixpoint( @@ -462,9 +520,9 @@ abstract module Parsers { if |remaining| < |input| then Fixpoint_(underlying, remaining) else if |remaining| == |input| then - PFailure(Recoverable, FailureData("No progress", remaining, Option.None)) + PFailure(Recoverable, FailureData("no progress", remaining, Option.None)) else - PFailure(Fatal, FailureData("Fixpoint called with an increasing remaining sequence", remaining, Option.None)); + PFailure(Fatal, FailureData("fixpoint called with an increasing remaining sequence", remaining, Option.None)); underlying(callback)(input) } opaque function FixpointMap( @@ -489,11 +547,12 @@ abstract module Parsers { // f = pos => underlying[fun](f, pos) decreases |input|, if fun in underlying then underlying[fun].order else 0 { - if fun !in underlying then PFailure(Fatal, FailureData("Parser '"+fun+"' not found", input, Option.None)) else + if fun !in underlying then PFailure(Fatal, FailureData("parser '"+fun+"' not found", input, Option.None)) else var RecursiveDef(orderFun, definitionFun) := underlying[fun]; var callback: ParserSelector := (fun': string) => + (var p : Parser := if fun' !in underlying.Keys then Fail(fun' + " not defined", Fatal) else @@ -502,11 +561,11 @@ abstract module Parsers { if |remaining| < |input| || (|remaining| == |input| && orderFun' < orderFun) then FixpointMap_(underlying, fun', remaining) else if |remaining| == |input| then - PFailure(Recoverable, FailureData("Non progressing recursive call requires that order of '" - +fun'+"' ("+Printer.natToString(orderFun')+") is lower than the order of '"+fun+"' ("+Printer.natToString(orderFun)+")", remaining, Option.None)) + PFailure(Recoverable, FailureData("non-progressing recursive call requires that order of '" + +fun'+"' ("+StringNat.natToString(orderFun')+") is lower than the order of '"+fun+"' ("+StringNat.natToString(orderFun)+")", remaining, Option.None)) else - PFailure(Fatal, FailureData("Parser did not return a suffix of the input", remaining, Option.None)) - ; + PFailure(Fatal, FailureData("parser did not return a suffix of the input", remaining, Option.None)) + ; p); definitionFun(callback)(input) } @@ -569,7 +628,7 @@ abstract module Parsers { else if pos' == pos then PFailure(Recoverable, "Non progressing recursive call requires that '"+fun'+"' be shorter than '"+fun+"'", pos', 0) else - PFailure(Fatal, "Parser did something unexpected, jump to position " + Printer.natToString(pos'), pos', 0) + PFailure(Fatal, "Parser did something unexpected, jump to position " + StringNat.natToString(pos'), pos', 0) ); if fun in underlying { assert {:only} FixMapSpecInner(fun, underlying.Keys, maxPos, callback, pos) by { @@ -856,7 +915,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { import opened ParserBuilders import opened Parsers import opened Wrappers - import opened Printer + import opened StringNat // Engine defines the following parsers: // Succeed(v) Always succeeds with the given value @@ -1038,7 +1097,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { { (pos: nat) requires pos <= |input| => if pos < |input| && test(pos) then PSuccess(pos + 1, input[pos]) - else PFailure(Recoverable, "Expected a different char but that's ok", pos) + else PFailure(Recoverable, "expected a different char but that's ok", pos) } ghost predicate ConstSpec(expected: string, p: Parser) { @@ -1365,7 +1424,7 @@ module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { requires result.PFailure? { var (line, lineNumber, charNumber) := LineContainingPos(result.pos); - result.message + " at position "+Printer.natToString(result.pos)+" line "+Printer.natToString(lineNumber)+", column "+Printer.natToString(charNumber)+":\n>"+ + result.message + " at position "+StringNat.natToString(result.pos)+" line "+StringNat.natToString(lineNumber)+", column "+StringNat.natToString(charNumber)+":\n>"+ line+"\n"+seq(charNumber, i => ' ')+"^\n" } method ReportError(p: ParseResult) @@ -1577,22 +1636,6 @@ abstract module ParserTests refines Parsers { reveal Map(); } - lemma AboutRepeat_( - underlying: Parser, - acc: seq, - input: seq - ) - // If underlying never throws a fatal error, - // returns a remaining that is a suffix of the input, - // then Repeat with always return a success - decreases |input| - requires Valid(underlying) - ensures Repeat_(underlying, acc, input).PSuccess? - { - reveal Repeat_(), Valid(); - assert IsRemaining(input, input[0..]); - } - predicate AboutRepeatIncreasesPosIfUnderlyingSucceedsAtLeastOnceEnsures( underlying: Parser, acc: seq, @@ -1607,27 +1650,6 @@ abstract module ParserTests refines Parsers { (|acc| < |result.result| && |result.remaining| < |input|)) } - lemma {:vcs_split_on_every_assert} AboutRepeatIncreasesPosIfUnderlyingSucceedsAtLeastOnce( - underlying: Parser, - acc: seq, - input: seq - ) - decreases |input| - requires Valid(underlying) - ensures - AboutRepeatIncreasesPosIfUnderlyingSucceedsAtLeastOnceEnsures - (underlying, acc, input) - { - reveal Repeat_(), Valid(); - var _ := input[0..]; - match underlying(input) - case PSuccess(result, remaining) => - if |remaining| < |input| { - AboutRepeatIncreasesPosIfUnderlyingSucceedsAtLeastOnce(underlying, acc + [result], remaining); - } - case _ => - } - predicate AboutFixpoint_Ensures( underlying: Parser -> Parser, input: seq) @@ -1663,64 +1685,60 @@ abstract module ParserTests refines Parsers { module StringParsers refines ParserTests { type C = char - opaque function Char(c: char): (p: Parser) + opaque function CharTest(test: char -> bool, name: string): (p: Parser) { (input: string) => - if 0 < |input| && input[0] == c then PSuccess(c, input[1..]) - else PFailure(Fatal, FailureData("Expected '"+[c]+"'", input, Option.None)) + if 0 < |input| && test(input[0]) then PSuccess(input[0], input[1..]) + else PFailure(Recoverable, + FailureData("expected a "+name + , input, Option.None)) } - opaque function Char?(expectedChar: char): (p: Parser) + opaque function Char(expectedChar: char): (p: Parser) { - CharTest?((c: char) => c == expectedChar, [expectedChar]) + CharTest((c: char) => c == expectedChar, [expectedChar]) } - opaque function CharTest?(test: char -> bool, name: string): (p: Parser) + opaque function Digit(): (p: Parser) { - (input: string) => - if 0 < |input| && test(input[0]) then PSuccess(input[0], input[1..]) - else PFailure(Recoverable, - FailureData("Expected a "+name+" but got " - + (if 0 < |input| then "'"+[input[0]]+"'" else "end of string") - , input, Option.None)) + CharTest(c => c in "0123456789", "digit") } - opaque function Digit(): (p: Parser) + opaque function DigitNumber(): (p: Parser) { - CharTest?(c => c in "0123456789", "digit") + Map(Digit(), (c: char) => + var n: nat := (if StringNat.IsStringNat([c]) then // Should always be true + StringNat.stringToNat([c]) + else 0); n + ) } opaque function Nat(): (p: Parser) { - Map(Repeat(Digit()), - result => - if Printer.IsStringNat(result) then // Should always be true - Printer.stringToNat(result) - else 0) + Bind(DigitNumber(), + (result: nat) => + RepeatAcc(DigitNumber(), + (previous: nat, c: nat) => + var r: nat := previous * 10 + c; r, + result + ) + ) } opaque function Int(): (p: Parser) { - Bind(Maybe(Char?('-')), + Bind(Maybe(Char('-')), (minusSign: Option) => - Map(Nat(), (result: nat) => if minusSign.Some? then -result else result)) - } - - opaque function Const(expected: string): (p: Parser) - { - (input: string) => - if |expected| <= |input| && input[0..|expected|] == expected then PSuccess(expected, input[|expected|..]) - else PFailure(Fatal, FailureData("Expected '"+expected+"'", input, Option.None)) + Map(Nat(), (result: nat) => if minusSign.Some? then 0-result else result)) } - opaque function Const?(expected: string): (p: Parser) + opaque function String(expected: string): (p: Parser) { (input: string) => if |expected| <= |input| && input[0..|expected|] == expected then PSuccess(expected, input[|expected|..]) - else PFailure(Recoverable, FailureData("Expected '"+expected+"'", input, Option.None)) + else PFailure(Recoverable, FailureData("expected '"+expected+"'", input, Option.None)) } - function repeat(str: string, n: nat): (r: string) ensures |r| == |str| * n { @@ -1730,16 +1748,75 @@ module StringParsers refines ParserTests { // TODO: Mention the error level, the line number, the column number // TODO: Extract only the line of interest - method PrintFailure(input: string, result: ParseResult) + method PrintFailure(input: string, result: ParseResult, printPos: int := -1) requires result.PFailure? + decreases result.data { + if printPos == -1 { + print "Error:\n"; + } var pos: int := |input| - |result.data.remaining|; // Need the parser to be Valid() if pos < 0 { // Could be proved false if parser is Valid() pos := 0; } - print input, "\n"; - print repeat(" ", pos), "^","\n"; + if printPos != pos { + print input, "\n"; + } + if printPos != pos { + print repeat(" ", pos), "^","\n"; + } print result.data.message; + if result.data.next.Some? { + print ", or\n"; + PrintFailure(input, PFailure(result.level, result.data.next.value), pos); + } else { + print "\n"; + } + } + +} + +// From these parsers, we can create displayers +// and prove the roundtrip displayer / parser if we wanted to +abstract module ParsersDiplayers { + import Parsers + + type Parser = Parsers.Parser + type C = Parsers.C + + type Displayer<-R> = (R, seq) -> seq + + function Concat( + left: Displayer, + right: Displayer + ): Displayer<(A, B)> { + (ab: (A, B), remaining: seq) => + var remaining2 := right(ab.1, remaining); + var remaining3 := left(ab.0, remaining2); + remaining3 } + ghost predicate Roundtrip(parse: Parser, display: Displayer) + // The parser and the displayer are dual to each other + // means that if we parse after printing, we get the same result + { + forall a: A, remaining: seq :: + parse(display(a, remaining)) == Parsers.PSuccess(a, remaining) + } + + lemma {:rlimit 200} ConcatRoundtrip( + pA: Parser, ppA: Displayer, + pB: Parser, ppB: Displayer + ) + requires Roundtrip(pA, ppA) && Roundtrip(pB, ppB) + ensures Roundtrip(Parsers.Concat(pA, pB), Concat(ppA, ppB)) + { + reveal Parsers.Concat(); + var p := Parsers.Concat(pA, pB); + var d := Concat(ppA, ppB); + forall ab: (A, B), remaining: seq ensures + p(d(ab, remaining)) == Parsers.PSuccess(ab, remaining) + { + } + } } \ No newline at end of file From 6b0b867172160faa1725b5a2d5e1fd25aca5ba08 Mon Sep 17 00:00:00 2001 From: Mikael Mayer Date: Tue, 14 Nov 2023 15:13:55 -0600 Subject: [PATCH 10/22] Refactored --- src/Parsers/exampleArithmetic.dfy | 157 ++- src/Parsers/library.dfy | 3 +- src/Parsers/parser.dfy | 1822 ----------------------------- src/Parsers/parserTests.dfy | 413 +++++++ src/Parsers/parsers.dfy | 550 +++++++++ src/Parsers/parsersDSL.dfy | 72 ++ src/Parsers/parsersDisplayers.dfy | 46 + src/Parsers/stringParsers.dfy | 142 +++ 8 files changed, 1324 insertions(+), 1881 deletions(-) delete mode 100644 src/Parsers/parser.dfy create mode 100644 src/Parsers/parserTests.dfy create mode 100644 src/Parsers/parsers.dfy create mode 100644 src/Parsers/parsersDSL.dfy create mode 100644 src/Parsers/parsersDisplayers.dfy create mode 100644 src/Parsers/stringParsers.dfy diff --git a/src/Parsers/exampleArithmetic.dfy b/src/Parsers/exampleArithmetic.dfy index 2b8808a9..f9e7456c 100644 --- a/src/Parsers/exampleArithmetic.dfy +++ b/src/Parsers/exampleArithmetic.dfy @@ -1,9 +1,92 @@ -include "parser.dfy" +include "stringParsers.dfy" module ArithmeticParser { import opened StringParsers import opened StringNat + // Pure functional style + const parser: Parser + := ConcatL( + RecursiveMap( + map[ + "atom" := + RecursiveDef(0, (callback: ParserSelector) => + Or(ConcatR( + String("("), ConcatL( + callback("term"), + String(")"))), + Or( + Map(Int(), (result: int) => Number(result)), ConcatR( + String("x"), + Map(Maybe(ConcatR( + String("^"), Int())), + (result: Option) => + if result.Some? then Unknown(result.value) else Unknown(1) + ))))), + "factor" := + RecursiveDef(1, (callback: ParserSelector) => + Bind(callback("atom"), (atom: Expression) => + Rep( + Concat(Or(String("*"), Or(String("/"), String("%"))), + callback("atom")), + Expression.InfixBuilder(), atom) + ) + ), + "term" := + RecursiveDef(2, (callback: ParserSelector) => + Bind(callback("factor"), (factor: Expression) => + Rep( + Concat(Or(String("+"), String("-")), + callback("factor")), + Expression.InfixBuilder(), factor) + ) + ) + ], + "term" + ), EndOfString()) + + // DSL style + const parserDSL: Parser + := ConcatL( + RecursiveMap( + map[ + "atom" := + RecursiveDef(0, (callback: ParserSelector) => + Or(ConcatR( + String("("), ConcatL( + callback("term"), + String(")"))), + Or( + Map(Int(), (result: int) => Number(result)), ConcatR( + String("x"), + Map(Maybe(ConcatR( + String("^"), Int())), + (result: Option) => + if result.Some? then Unknown(result.value) else Unknown(1) + ))))), + "factor" := + RecursiveDef(1, (callback: ParserSelector) => + Bind(callback("atom"), (atom: Expression) => + Rep( + Concat(Or(String("*"), Or(String("/"), String("%"))), + callback("atom")), + Expression.InfixBuilder(), atom) + ) + ), + "term" := + RecursiveDef(2, (callback: ParserSelector) => + Bind(callback("factor"), (factor: Expression) => + Rep( + Concat(Or(String("+"), String("-")), + callback("factor")), + Expression.InfixBuilder(), factor) + ) + ) + ], + "term" + ), EndOfString()) + + type Result = StringParsers.Wrappers.Result datatype Expression = @@ -11,27 +94,28 @@ module ArithmeticParser { | Number(value: int) | Unknown(power: int) { - function Distribute(): Result { + + function Simplify(): Result { match this { case Number(x: int) => Result.Success(this) case Binary(op, left, right) => - var l :- left.Distribute(); - var r :- right.Distribute(); + var l :- left.Simplify(); + var r :- right.Simplify(); if l.Number? && r.Number? then match op { case "+" => Result.Success(Number(l.value + r.value)) case "-" => Result.Success(Number(l.value - r.value)) case "*" => Result.Success(Number(l.value * r.value)) case "/" => - if r.value == 0 then Result.Failure("Division by zero (" + right.ToString(0) + " evaluates to zero)") else + if r.value == 0 then Result.Failure("Division by zero (" + right.ToString() + " evaluates to zero)") else Result.Success(Number(l.value / r.value)) case "%" => - if r.value == 0 then Result.Failure("Modulo by zero (" + right.ToString(0) + " evaluates to zero)") else + if r.value == 0 then Result.Failure("Modulo by zero (" + right.ToString() + " evaluates to zero)") else Result.Success(Number(l.value % r.value)) case _ => Result.Failure("Unsupported operator: " + op) } else - Result.Success(this) + Result.Success(Binary(op, l, r)) case Unknown(0) => Result.Success(Number(1)) case Unknown(_) => Result.Success(this) @@ -45,15 +129,14 @@ module ArithmeticParser { { (left: Expression, right: (string, Expression)) => Binary(right.0, left, right.1) } - function ToString(level: nat): string - requires level <= 2 + function ToString(): string { match this case Number(x) => (if x < 0 then "-" else "") + StringNat.natToString(if x < 0 then -x else x) case Binary(op, left, right) => - (match level case 0 => "(" case 1 => "[" case 2 => "{") - + left.ToString(level) + op + right.ToString(level) - + (match level case 0 => ")" case 1 => "]" case 2 => "}") + "(" + + left.ToString() + op + right.ToString() + + ")" case Unknown(power) => if power == 1 then "x" else if power == 0 then "1" else if power < 0 then "x^(-" + StringNat.natToString(0-power)+")" else @@ -66,46 +149,6 @@ module ArithmeticParser { if result.1.None? then result.0 else Binary(result.1.value.0, result.0, result.1.value.1)) - const parser: Parser - := ConcatL( - FixpointMap( - map[ - "atom" := - RecursiveDef(0, (callback: ParserSelector) => - Or(ConcatR( - String("("), ConcatL( - callback("term"), - String(")"))), - Or( - Map(Int(), (result: int) => Number(result)), ConcatR( - String("x"), - Map(Maybe(ConcatR( - String("^"), Int())), - (result: Option) => - if result.Some? then Unknown(result.value) else Unknown(1) - ))))), - "factor" := - RecursiveDef(1, (callback: ParserSelector) => - Bind(callback("atom"), (atom: Expression) => - RepeatAcc( - Concat(Or(String("*"), Or(String("/"), String("%"))), - callback("atom")), - Expression.InfixBuilder(), atom) - ) - ), - "term" := - RecursiveDef(2, (callback: ParserSelector) => - Bind(callback("factor"), (factor: Expression) => - RepeatAcc( - Concat(Or(String("+"), String("-")), - callback("factor")), - Expression.InfixBuilder(), factor) - ) - ) - ], - "term" - ), EndOfString()) - method Main(args: seq) { if |args| <= 1 { return; @@ -113,15 +156,15 @@ module ArithmeticParser { for i := 1 to |args| { var input := args[i]; match parser(input) { - case PSuccess(result, remaining) => + case Success(result, remaining) => if |remaining| != 0 { print "'" + remaining +"'", "\n"; - PrintFailure(input, PFailure(Recoverable, FailureData("Expected end of string", remaining, Option.None))); + PrintFailure(input, Failure(Recoverable, FailureData("Expected end of string", remaining, Option.None))); } - print "Computation:", result.ToString(0), "\n"; - match result.Distribute() { + print "Computation:", result.ToString(), "\n"; + match result.Simplify() { case Success(x) => - print "Result:", x.ToString(0), "\n"; + print "Result:", x.ToString(), "\n"; case Failure(message) => print message; } diff --git a/src/Parsers/library.dfy b/src/Parsers/library.dfy index defd3a49..e21500c2 100644 --- a/src/Parsers/library.dfy +++ b/src/Parsers/library.dfy @@ -1,5 +1,4 @@ -module {:options "/functionSyntax:4"} StringNat { - +module StringNat { predicate IsStringNat(s: string) { |s| > 0 && (|s| > 1 ==> s[0] != '0') && forall i | 0 <= i < |s| :: s[i] in "0123456789" diff --git a/src/Parsers/parser.dfy b/src/Parsers/parser.dfy deleted file mode 100644 index 66ea635a..00000000 --- a/src/Parsers/parser.dfy +++ /dev/null @@ -1,1822 +0,0 @@ -include "../Wrappers.dfy" -include "library.dfy" - -// Functional parsers are consuming the string from the left to the right. -abstract module Parsers { - import Wrappers - import StringNat - type Option = Wrappers.Option - - type C(!new, ==) // The character of the sequence being parsed - - datatype Either<+L, +R> = - // Type to return when using the Or parser - Left(l: L) | Right(r: R) - - datatype FailureData = FailureData(message: string, remaining: seq, next: Option) - { - function Concat(other: FailureData): FailureData { - if next == Option.None then this.(next := Option.Some(other)) - else - FailureData(message, remaining, Option.Some(next.value.Concat(other))) - } - } - - datatype FailureLevel = - // PFailure level for parse results. A Fatal error results in a unique FailurePosition - // and will be propagated to the top, - // while a Recoverable can be caught by a disjunctive pattern. - // For example, if the parser Const?() fails, then it returns a Recoverable, - // but the parser Const() will return an error. - Fatal | Recoverable - - datatype ParseResult<+R> = - // ParseResult is a failure-compatible type - | PFailure(level: FailureLevel, data: FailureData) - // Returned if a parser failed - | PSuccess(result: R, remaining: seq) - // Returned if a parser succeeds, with the increment in the position - { - function Remaining(): seq { - if PSuccess? then remaining else data.remaining - } - predicate IsFailure() { - PFailure? - } - - predicate IsFatalFailure() { - PFailure? && level == Fatal - } - - predicate IsFatal() - requires IsFailure() - { - level == Fatal - } - - function PropagateFailure(): ParseResult - requires IsFailure() - { - PFailure(level, data) - } - - function Extract(): (R, seq) - requires !IsFailure() - { - (result, remaining) - } - - function MapResult(f: R -> R'): ParseResult { - match this - case PSuccess(result, remaining) => - PSuccess(f(result), remaining) - case PFailure(level, data) => - PFailure(level, data) - } - - function MapRecoverableError(f: FailureData -> FailureData): ParseResult { - match this - case PFailure(Recoverable, data) => - PFailure(Recoverable, f(data)) - case _ => this - } - - function IfRecoverableFailureNoProgress(input: seq, right: Parser): ParseResult { - if PFailure? && level == Recoverable && !Committed(input, Remaining()) then - right(input) - else - this - } - } - - type Parser<+R> = seq -> ParseResult - // A parser is a total function from a position to a parse result - // Because it returns a delta pos, it cannot return a position negative from the origing - // If the parsing is out of context, it will return a failure. - - type ParserSelector = string -> Parser - // A parser selector is a function that, given a name that exists, - // returns a parser associated to this name - - predicate Committed(input: seq, remaining: seq) { - input != remaining - } - - predicate IsRemaining(input: seq, remaining: seq) - // Remaining is a suffix of the input - { - && |remaining| <= |input| - && input[|input|-|remaining|..] == remaining - } - - opaque ghost predicate Valid(underlying: Parser) - // A parser is valid for an input if it never returns a fatal error - // and always returns a suffix of its input - { - forall input: seq :: - && (underlying(input).PFailure? ==> underlying(input).level == Recoverable) - && IsRemaining(input, underlying(input).Remaining()) - } - - // Parser combinators. - // The following functions make it possible to create and compose parsers - // All these combinators provide non-crashing parsers if their inputs are noncrashing - - opaque function Succeed(result: R): (p: Parser) - // A parser that does not consume any input and returns the given value - // This is a generic function, it's better to use the Succeed function on strings. - { - (input: seq) => PSuccess(result, input) - } - - lemma Succeed_NonCrashing(result: R) - ensures Valid(Succeed(result)) - { reveal Valid(), Succeed(); } - - lemma Succeed_NonCrashingAuto() - ensures forall result: R :: Valid(Succeed(result)) - { reveal Valid(), Succeed(); } - - opaque function Epsilon(): (p: Parser<()>) - { - Succeed(()) - } - - lemma Epsilon_NonCrashing() - ensures Valid(Epsilon()) - { reveal Valid(), Epsilon(); Succeed_NonCrashing(()); } - - lemma AboutEpsilon_(input: seq) - ensures - var p := Epsilon(); - && p(input).PSuccess? - && p(input).remaining == input - { - reveal Epsilon(); - reveal Succeed(); - } - - opaque function Fail(message: string, level: FailureLevel := Recoverable): Parser - // A parser that does not consume any input and returns the given failure - { - (input: seq) => PFailure(level, FailureData(message, input, Option.None)) - } - - opaque function EndOfString(): Parser<()> - { - (input: seq) => - if |input| == 0 then PSuccess((), input) - else PFailure(Recoverable, FailureData("expected end of string", input, Option.None)) - } - - lemma Fail_NonCrashing(message: string) - ensures Valid(Fail(message, Recoverable)) - { reveal Fail(); reveal Valid(); } - - lemma Fail_NonCrashingAuto() - ensures forall message :: Valid(Fail(message, Recoverable)) - { reveal Fail(); reveal Valid(); } - - opaque function Bind( - left: Parser, - right: L -> Parser - ) : (p: Parser) - // Fails if the left parser fails. - // If the left parser succeeds, provides its result and its remaining - // to the right parser generator and returns its result applied to the remaining - // For a more general version, look at BindSucceeds and Bind___ - { - (input: seq) - => - var (leftResult, remaining) :- left(input); - right(leftResult)(remaining) - } - - opaque function BindSucceeds( - left: Parser, - right: (L, seq) -> Parser - ) : (p: Parser) - // Fails if the left parser fails. - // If the left parser succeeds, provides its result and its remaining - // to the right parser generator and returns its result applied to the remaining - // For a more general version, look at BindResult - { - (input: seq) - => - var (leftResult, remaining) :- left(input); - right(leftResult, remaining)(remaining) - } - - opaque function BindResult( - left: Parser, - right: (ParseResult, seq) -> Parser - ) : (p: Parser) - // Given a left parser and a parser generator based on the output - // of the left parser, returns the result of the second parser - { - (input: seq) - => - right(left(input), input)(input) - } - - ghost predicate BindRightNonCrashing(right: (L, seq) -> Parser) { - forall l: L, input: seq :: Valid(right(l, input)) - } - - lemma Bind_NonCrashing( - left: Parser, - right: (L, seq) -> Parser - ) - requires Valid(left) - requires BindRightNonCrashing(right) - ensures Valid(BindSucceeds(left, right)) - { - reveal BindSucceeds(), Valid(); - var p := BindSucceeds(left, right); - forall input: seq ensures - && (p(input).PFailure? ==> p(input).level == Recoverable) - && IsRemaining(input, p(input).Remaining()) - { - - } - } - - ghost predicate Bind_NonCrashingRight(left: Parser) - requires Valid(left) - { - forall right: (L, seq) -> Parser | BindRightNonCrashing(right) :: - Valid(BindSucceeds(left, right)) - } - - lemma Bind_NonCrashingAuto() - ensures forall left: Parser | Valid(left) :: - Bind_NonCrashingRight(left) - { - forall left: Parser | Valid(left), - right: (L, seq) -> Parser | BindRightNonCrashing(right) - ensures - Valid(BindSucceeds(left, right)) - { - Bind_NonCrashing(left, right); - } - } - - opaque function Map(underlying: Parser, mappingFunc: R -> U) - : (p: Parser) - // A parser combinator that makes it possible to transform the result of a parser in another one - // The mapping function can be partial - // ensures forall pos | MapSpec(size, underlying, mappingFunc, pos) :: - // p.requires(pos) - { - (input: seq) => - var (result, remaining) :- underlying(input); - var u := mappingFunc(result); - PSuccess(u, remaining) - } - - opaque function Not(underlying: Parser): Parser<()> - // Returns a parser that succeeds if the underlying - { - (input: seq) => - var l := underlying(input); - if l.IsFailure() then - if l.IsFatal() then l.PropagateFailure() - else PSuccess((), input) - else PFailure(Recoverable, FailureData("not failed", input, Option.None)) - } - - opaque function And( - left: Parser, - right: Parser - ) : (p: Parser<(L, R)>) - // Make the two parsers parse the same string, and return - // a pair of the two results, with the remaining of the right - { - (input: seq) => - var (l, remainingLeft) :- left(input); - var (r, remainingRight) :- right(input); - PSuccess((l, r), remainingRight) - } - - opaque function DisjunctiveOr( - left: Parser, - right: Parser - ) : (p: Parser>) - // left parses the string. If left succeeds, returns - // if left fails, two cases - // - If the error is recoverable and the parser did not consume input, - // then return what right returns - // - Otherwise return the first error - { - (input: seq) => - var p := Map(left, l => Left(l))(input); - p.IfRecoverableFailureNoProgress(input, - Map(right, r => Right(r))) - .MapRecoverableError(dataRight => - if p.IsFailure() && p.level.Recoverable? then - p.data.Concat(dataRight) - else - dataRight - ) - } - - opaque function Or( - left: Parser, - right: Parser - ) : (p: Parser) - // left parses the string. If left succeeds, returns - // if left fails, two cases - // - If the error is recoverable and the parser did not consume input, - // then return what right returns - // - Otherwise return the first error - { - (input: seq) => - var p := left(input); - p.IfRecoverableFailureNoProgress(input, right) - .MapRecoverableError(dataRight => - if p.IsFailure() && p.level.Recoverable? then - p.data.Concat(dataRight) - else - dataRight - ) - } - - opaque function Lookahead(underlying: Parser): (p: Parser) - // If the underlying parser succeeds, returns its result without committing the input - // if the underlying parser fails, - // - If the failure is fatal, returns it as-it - // - If the failure is recoverable, returns it without comitting the input - { - (input: seq) => - var p := underlying(input); - if p.IsFailure() then - if p.IsFatal() then - p - else - p.(data := FailureData(p.data.message, input, Option.None)) - else - p.(remaining := input) - } - - opaque function ?(underlying: Parser): (p: Parser) - // If the underlying parser succeeds, returns its committed result - // if the underlying parser fails, - // - If the failure is fatal, returns it as-it - // - If the failure is recoverable, returns it without comitting the input - { - (input: seq) => - var p := underlying(input); - if p.IsFailure() then - if p.IsFatal() then - p - else - p.(data := FailureData(p.data.message, input, Option.None)) - else - p - } - - opaque function If( - condition: Parser, - succeed: Parser - ) : (p: Parser) - // If the condifition fails, returns a non-committing failure - // Suitable to use in Or parsers - { - (input: seq) => - var (p, remaining) :- Lookahead(condition)(input); - succeed(input) - } - - opaque function Maybe(underlying: Parser): Parser> - { - (input: seq) => - var u := underlying(input); - if u.IsFatalFailure() then u.PropagateFailure() - else - if u.PSuccess? then u.MapResult(result => Option.Some(result)) - else PSuccess(Option.None, input) - } - - opaque function Concat( - left: Parser, - right: Parser - ) : (p: Parser<(L, R)>) - // Makes it possible to concatenate two consecutive parsers and return the pair of the results - { - (input: seq) - => - var (l, remaining) :- left(input); - var (r, remaining2) :- right(remaining); - PSuccess((l, r), remaining2) - } - - opaque function ConcatR( - left: Parser, - right: Parser - ) : (p: Parser) - // Return only the result of the right parser if the two parsers match - { - (input: seq) - => - var (l, remaining) :- left(input); - var (r, remaining2) :- right(remaining); - PSuccess(r, remaining2) - } - - opaque function ConcatL( - left: Parser, - right: Parser - ) : (p: Parser) - // Return only the result of the right parser if the two parsers match - { - (input: seq) - => - var (l, remaining) :- left(input); - var (r, remaining2) :- right(remaining); - PSuccess(l, remaining2) - } - - opaque function Repeat( - underlying: Parser - ): Parser> { - (input: seq) => Repeat_(underlying, [], input) - } - - opaque function {:tailrecursion true} Repeat_( - underlying: Parser, - acc: seq, - input: seq - ): (p: ParseResult>) - decreases |input| - // Repeat the underlying parser over the input until a recoverable failure happens - // and returns the accumulated results - { - match underlying(input) - case PSuccess(result, remaining) => - if |remaining| >= |input| then PSuccess(acc + [result], input) else - Repeat_(underlying, acc + [result], remaining) - case PFailure(Fatal, data) => - PFailure(Fatal, data) - case PFailure(Recoverable, data) => - if |data.remaining| == |input| then - PSuccess(acc, input) - else - PFailure(Recoverable, data) - } - - opaque function RepeatAcc( - underlying: Parser, - combine: (A, B) -> A, - acc: A - ): Parser { - (input: seq) => RepeatAcc_(underlying, combine, acc, input) - } - - opaque function {:tailrecursion true} RepeatAcc_( - underlying: Parser, - combine: (A, B) -> A, - acc: A, - input: seq - ): (p: ParseResult) - decreases |input| - // Repeat the underlying parser over the input until a recoverable failure happens - // and returns the accumulated results - { - match underlying(input) - case PSuccess(result, remaining) => - if |remaining| >= |input| then PSuccess(acc, input) else - RepeatAcc_(underlying, combine, combine(acc, result), remaining) - case PFailure(Fatal, data) => - PFailure(Fatal, data) - case PFailure(Recoverable, data) => - if |data.remaining| == |input| then - PSuccess(acc, input) - else - PFailure(Recoverable, data) - } - - opaque function Fixpoint( - underlying: Parser -> Parser - ): (p: Parser) - // Given a function that requires a parser and a position to return a parse result, - // provide this function the Fixpoint() parser itself - // so that it makes it possible to iteratively parse the result - { - (input: seq) => Fixpoint_(underlying, input) - } - - opaque function Fixpoint_( - underlying: Parser -> Parser, - input: seq - ): (p: ParseResult) - // Given a function that combines a (recursive) parser and a position to obtain a parse result, - // returns the parse result associated to recursively applying the function. - // If partially applied on "underlying" and "size", it would returns the solution to the equation: - // f = pos => underlying(f, pos) - decreases |input| - { - var callback: Parser := - (remaining: seq) => - if |remaining| < |input| then - Fixpoint_(underlying, remaining) - else if |remaining| == |input| then - PFailure(Recoverable, FailureData("no progress", remaining, Option.None)) - else - PFailure(Fatal, FailureData("fixpoint called with an increasing remaining sequence", remaining, Option.None)); - underlying(callback)(input) - } - opaque function FixpointMap( - underlying: map>, - fun: string): (p: Parser) - { - (input: seq) => FixpointMap_(underlying, fun, input) - } - - datatype RecursiveDef = RecursiveDef( - order: nat, - definition: ParserSelector -> Parser - ) - opaque function FixpointMap_( - underlying: map>, - fun: string, - input: seq - ): (p: ParseResult) - // Given a function that combines a (recursive) parser selector and a position to obtain a parse result, - // returns the parse result associated to recursively applying the function. - // If partially applied on "underlying" and "fun", it would return the solution f<"fun"> to the equations: - // f = pos => underlying[fun](f, pos) - decreases |input|, if fun in underlying then underlying[fun].order else 0 - { - if fun !in underlying then PFailure(Fatal, FailureData("parser '"+fun+"' not found", input, Option.None)) else - var RecursiveDef(orderFun, definitionFun) := underlying[fun]; - var callback: ParserSelector - := - (fun': string) => - (var p : Parser := - if fun' !in underlying.Keys then - Fail(fun' + " not defined", Fatal) - else - var RecursiveDef(orderFun', definitionFun') := underlying[fun']; - (remaining: seq) => - if |remaining| < |input| || (|remaining| == |input| && orderFun' < orderFun) then - FixpointMap_(underlying, fun', remaining) - else if |remaining| == |input| then - PFailure(Recoverable, FailureData("non-progressing recursive call requires that order of '" - +fun'+"' ("+StringNat.natToString(orderFun')+") is lower than the order of '"+fun+"' ("+StringNat.natToString(orderFun)+")", remaining, Option.None)) - else - PFailure(Fatal, FailureData("parser did not return a suffix of the input", remaining, Option.None)) - ; p); - definitionFun(callback)(input) - } - - /* - ghost predicate FixMapSpecInnerInner( - fun: string, fun': string, functions: set, maxPos: nat, callback: ParserSelector, u: nat) - { - forall u': nat | u < u' <= maxPos || (u == u' && |fun'| < |fun|) :: - && callback(fun').Some? - && var x := callback(fun').value(u'); - && (x.PSuccess? ==> u' + x.deltaPos <= maxPos) - } - - ghost predicate FixMapSpecInner(fun: string, functions: set, maxPos: nat, callback: ParserSelector, u: nat) - // Specification for FixpointMap. - // Ensures that, for any other function, if this function is in the set of admissible `functions`, - // then callback should not only accept it, but then accept any position at a second argument if - // 1) This position is strictly greater than the current position u - // 2) Or this position is the same but the function name is smaller. - { - && u <= maxPos - && forall fun': string <- functions :: - FixMapSpecInnerInner(fun, fun', functions, maxPos, callback, u) - } - - ghost predicate FixpointMapSpecOnce(fun': string, impl: ParserMapper, otherFuns: set, maxPos: nat) - { - forall callback: ParserSelector, u: nat | - && FixMapSpecInner(fun', otherFuns, maxPos, callback, u) - :: var x := impl(callback, u); - && (x.PSuccess? ==> u + x.deltaPos <= maxPos) - } - - lemma AboutFixpointMap_( - maxPos: nat, - underlying: map, nat) -> ParseResult>, - fun: string, - pos: nat - ) - requires pos <= maxPos - requires - forall fun' <- underlying.Keys :: - FixpointMapSpecOnce(fun', underlying[fun'], underlying.Keys, maxPos) - ensures - AboutFixpointMap_Ensures(maxPos, underlying, fun, pos) - { - reveal FixpointMap_(); - var p := FixpointMap_(maxPos, underlying, fun, pos); - - var callback: ParserSelector - := - (fun': string) => - if fun' !in underlying.Keys then - None - else - Some( - (pos': nat) => - if pos < pos' <= maxPos || (pos' == pos && |fun'| < |fun|) then - FixpointMap_(maxPos, underlying, fun', pos') - else if pos' == pos then - PFailure(Recoverable, "Non progressing recursive call requires that '"+fun'+"' be shorter than '"+fun+"'", pos', 0) - else - PFailure(Fatal, "Parser did something unexpected, jump to position " + StringNat.natToString(pos'), pos', 0) - ); - if fun in underlying { - assert {:only} FixMapSpecInner(fun, underlying.Keys, maxPos, callback, pos) by { - assume && pos <= maxPos - && forall fun': string <- underlying.Keys :: - FixMapSpecInnerInner(fun, fun', underlying.Keys, maxPos, callback, pos); - assume false; - } - assert p == underlying[fun](callback, pos); - assert FixpointMapSpecOnce(fun, underlying[fun], underlying.Keys, maxPos); - var impl := underlying[fun]; - assert forall callback: ParserSelector, u: nat | - && FixMapSpecInner(fun, underlying.Keys, maxPos, callback, u) - :: var x := impl(callback, u); - && (x.PSuccess? ==> u + x.deltaPos <= maxPos); - assert p.PSuccess? ==> pos + p.deltaPos <= maxPos; - } else { - } - } - - ghost predicate OrSpec( - size: nat, - left: Parser, - right: Parser, - pos: nat) - // Verifies that the two parsers can both be applied at the given position - { - && left.requires(pos) - && (left(pos).PSuccess? ==> pos <= left(pos).pos <= size) - && right.requires(pos) - && (right(pos).PSuccess? ==> pos <= right(pos).pos <= size) - // Since requires are only used for progression, I don'result have a use case in which the following is useful - /*&& var l := left(pos); - && (l.PFailure? && l.level == Recoverable ==> right.requires(pos))*/ - } - - opaque function Or_( - ghost size: nat, - left: Parser, - right: Parser - ) : (p: Parser) - // Builds a parser from left and right such that, if left fails and is recoverable, then right is used instead. - ensures forall pos: nat | OrSpec(size, left, right, pos) - :: p.requires(pos) - && (p(pos).PSuccess? ==> pos <= p(pos).pos <= size) - { - (pos: nat) requires OrSpec(size, left, right, pos) - => - match left(pos) - case PFailure(Recoverable, message, pos') => right(pos) - case PFailure(Fatal, message, pos') => PFailure(Fatal, message, pos') - case PSuccess(pos, l) => PSuccess(pos, l) - } - - ghost predicate EitherPSpec(size: nat, left: Parser, right: Parser, pos: nat) { - && left.requires(pos) - && (left(pos).PSuccess? ==> pos <= left(pos).pos <= size) - && (left(pos).PFailure? && left(pos).level == Recoverable ==> - right.requires(pos) - && (right(pos).PSuccess? ==> pos <= right(pos).pos <= size)) - } - - opaque function EitherP_( - ghost size: nat, - left: Parser, - right: Parser - ) : (p: Parser>) - // Same as 'Or' but supports returning two heterogeneous values - ensures forall pos: nat | - EitherPSpec(size, left, right, pos) - :: p.requires(pos) - && (p(pos).PSuccess? ==> pos <= p(pos).pos <= size) - { - (pos: nat) - requires EitherPSpec(size, left, right, pos) - => - match left(pos) - case PFailure(Recoverable, message, pos') => right(pos).MapResult(r => Right(r)) - case PFailure(Fatal, message, pos') => PFailure(Fatal, message, pos') - case PSuccess(pos, l) => PSuccess(pos, Left(l)) - } -} - -module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserTheorems { - import opened Parsers - // The remaining are interesting proofs about some equivalence but are not useful - - opaque function Map__(ghost size: nat, underlying: Parser, mappingFunc: R --> U) - : (p: Parser) - // Map is equivalent to a bind method: - ensures forall pos: nat | MapSpec(size, underlying, mappingFunc, pos) - :: p.requires(pos) - { - var p := BindSucceeds(size, underlying, (result: R, pos': nat) requires mappingFunc.requires(result) => Succeed(size, mappingFunc(result))); - assert forall pos: nat | MapSpec(size, underlying, mappingFunc, pos) :: - p.requires(pos) by { - forall pos: nat | MapSpec(size, underlying, mappingFunc, pos) - ensures p.requires(pos) - { - AboutMap_(size, underlying, mappingFunc, pos); - var left := underlying; - var right := (result: R, pos': nat) requires mappingFunc.requires(result) => Succeed(size, mappingFunc(result)); - assert BindSpec(size, left, right, pos); - } - } - p - } - - lemma Map_Map2(size: nat, underlying: Parser, mappingFunc: R --> U, pos: nat) - requires MapSpec(size, underlying, mappingFunc, pos) - ensures - && Map__(size, underlying, mappingFunc)(pos) == Map(size, underlying, mappingFunc)(pos) - { - reveal Map(); - reveal Map__(); - reveal BindSucceeds(); - reveal Succeed(); - } - - opaque function Concat_(ghost size: nat, left: Parser, right: Parser) - : (p: Parser<(R, U)>) - // Concat is equivalent to two binds methods - ensures forall pos: nat | ConcatSpec_(size, left, right, pos) - :: p.requires(pos) - { - BindSucceeds(size, left, (result: R, pos': nat) requires right.requires(pos') => - BindSucceeds(size, right, (u: U, pos'': nat) => Succeed(size, (result, u)))) - } - - lemma ConcatConcat2(size: nat, left: Parser, right: Parser, pos: nat) - requires ConcatSpec_(size, left, right, pos) - ensures BindSpec(size, left, (result: R, pos': nat) requires right.requires(pos') => - BindSucceeds(size, right, (u: U, pos'': nat) => Succeed(size, (result, u))), pos) - // TODO: Bug to report. Concat() should not be needed - ensures Concat(size, left, right)(pos) == Concat_(size, left, right)(pos) - { - reveal BindSucceeds(); - reveal Concat(); - reveal Concat_(); - reveal Succeed(); - } -} - -// Nice DSL to build parsers -// B(p) returns a parser builder from a normal parser. -// B1.o_I(B2) will parse both but return the result of B2 -// B1.I_o(B2) will parse both but return the result of B1 -// B.M(f) will map the result of the parser builder by f if succeeded -// B1.O(B2) will either parse B1, or B2 if B1 fails with Recoverable -// FirstOf([B1, B2, B3]) -// will parse with B1, but if B1 fails with Recoverable, -// it will parse with B2, but if B2 fails with Recoverable, -// it will parse with B3 -// R(v) returns a parser builder that returns immediately v -// -// There are more parser builders in the trait Engine, when their spec depends on -// a predetermined input, e.g. to tests for constant strings - -module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserBuilders { - import opened Parsers - //import opened ParserEngine - - // Wrap the constructor in a class where the size is constant so that users - // don'result need to provide it. - datatype ParserBuilder = B_(ghost size: nat, apply: Parser) - { - opaque function o_I(other: ParserBuilder): (p: ParserBuilder) - requires size == other.size - ensures p.size == size - ensures forall pos: nat | - ConcatSpec_(size, apply, other.apply, pos) - :: p.apply.requires(pos) - && (p.apply(pos).PSuccess? ==> pos <= p.apply(pos).pos <= size) - { - B_(size, ConcatR(size, apply, other.apply)) - } - opaque function I_o(other: ParserBuilder): (p: ParserBuilder) - requires size == other.size - ensures p.size == size - ensures forall pos: nat | - ConcatSpec_(size, apply, other.apply, pos) - :: p.apply.requires(pos) - && (p.apply(pos).PSuccess? ==> pos <= p.apply(pos).pos <= size) - { - B_(size, ConcatL(size, apply, other.apply)) - } - opaque function M(mappingFunc: R --> U): (p: ParserBuilder) - ensures p.size == size - ensures forall pos: nat | - MapSpec(size, apply, mappingFunc, pos) - :: p.apply.requires(pos) - && (p.apply(pos).PSuccess? ==> pos <= p.apply(pos).pos <= size) - { - B_(size, Map(size, apply, mappingFunc)) - } - opaque function O(other: ParserBuilder): (p: ParserBuilder) - requires size == other.size - ensures size == p.size - ensures forall pos: nat | - OrSpec(size, apply, other.apply, pos) - :: p.apply.requires(pos) - && (p.apply(pos).PSuccess? ==> pos <= p.apply(pos).pos <= size) - { - B_(size, Or_(size, apply, other.apply)) - } - - opaque function Then(other: (R, nat) --> ParserBuilder): (p: ParserBuilder) - ensures size == p.size - ensures forall pos: nat | - BindSpec(size, apply, (result: R, pos': nat) requires other.requires(result, pos') => other(result, pos').apply, pos) - :: p.apply.requires(pos) - && ( - p.apply(pos).PSuccess? ==> - && apply(pos).PSuccess? - && apply(pos).pos <= p.apply(pos).pos <= size) - { - B_(size, BindSucceeds(size, apply, (result: R, pos': nat) requires other.requires(result, pos') => other(result, pos').apply)) - } - - opaque function Repeat(init: R, combine: (R, R) -> R): (p: ParserBuilder) - ensures size == p.size - ensures forall pos: nat | pos <= size && RepeatSpec(apply, pos, size-pos, size) :: - p.apply.requires(pos) - && (p.apply(pos).PSuccess? ==> pos <= p.apply(pos).pos <= size) - - { - B_(size, - (pos: nat) - requires pos <= size - requires RepeatSpec(apply, pos, size-pos, size) - => RepeatAcc_(apply, pos, init, combine, size)) - } - } - opaque function FirstOf_(ghost size: nat, others: seq>): (p: ParserBuilder) - requires |others| > 0 - requires forall other <- others :: other.size == size - ensures p.size == size - ensures - forall pos: nat | - forall pp | pp in others :: pp.apply.requires(pos) && (pp.apply(pos).PSuccess? ==> pos <= pp.apply(pos).pos <= size) - :: - p.apply.requires(pos) && (p.apply(pos).PSuccess? ==> pos <= p.apply(pos).pos <= size) - { - if |others| == 1 then others[0] - else - var p := others[0].O(FirstOf_(size, others[1..])); - assert forall pos: nat | - forall pp | pp in others :: pp.apply.requires(pos) && (pp.apply(pos).PSuccess? ==> pos <= pp.apply(pos).pos <= size) - :: - p.apply.requires(pos) && (p.apply(pos).PSuccess? ==> pos <= p.apply(pos).pos <= size) by { - } - p - } - function R_(ghost size: nat, result: R): (p: ParserBuilder) - { - B_(size, Succeed(size, result)) - } - datatype FixMapParserBuilder = FixMapParserBuilder(ghost size: nat, ghost functions: set, underlying: map> := map[]) - { - static function Empty(ghost size: nat, ghost functions: set): (b: FixMapParserBuilder) ensures b.Valid() { - FixMapParserBuilder(size, functions, map[]) - } - ghost predicate Valid() { - forall fun <- underlying :: FixpointMapSpecOnce(fun, underlying[fun], functions, size) - } - opaque function Add(name: string, mapper: ParserMapper): (f: FixMapParserBuilder) - requires Valid() - requires name !in underlying - requires FixpointMapSpecOnce(name, mapper, functions, size) - ensures f.Valid() - ensures f.functions == functions - ensures f.size == size - ensures name in f.underlying - ensures this.underlying.Keys + {name} == f.underlying.Keys - { - this.(underlying := underlying[name := mapper]) - } - } -} - -// Defines an "Engine" trait to be extended, which gives access to more -// parser combinators and parser builders that require access to an input string -module {:options "-functionSyntax:4", "-quantifierSyntax:4"} ParserEngine { - import opened ParserBuilders - import opened Parsers - import opened Wrappers - import opened StringNat - - // Engine defines the following parsers: - // Succeed(v) Always succeeds with the given value - // Bind(l, r) If l succeeds, returns the result of running r on the result - // Epsilon Always succeeds and returns () - // Map(p, f) If p succeeds, maps its result with f - // Concat(l, r) if l and r succeed consecutively, pair their results - // ConcatL(l, r) if l and r succeed consecutively, returns the value of l - // ConcatR(l, r) if l and r succeed consecutively, returns the value of r - // Or(l, r) Returns the first of l or r which succeeds - // EitherP(l, r) Returns the first of l or r which succeeds, wrapped in Either type0 - // Rep(parser) repeats the parser as much as possible and returns the sequence of results - // Fix((result, pos) => parseResult) returns a parser that recursively applies the provided function when needed - // FixMap((result, pos) => parseResult) Same as fix but can provide a mapping from string to functions instead of a single function - // EOS Succeeds if we reached the end of the string, fails with Fatal otherwise - // FirstOf([p*]) returns the first parser which succeeds - // Maybe(p) If p succeeds, returns Some(p.result), otherwise returns None if p fails with Recoverable - // DebugParser(msg, p) Prints the given message and pass through p - // - // Engine defines the following parser builders (please avoid them because they are not performant!) - // C("string") fails with Fatal if "string" is not at the given position. - // C?("string") fails with Recoverable if "string" is not at the given position. - // B(p) wraps a regular parser - // R(v) A parser builder that returns the given value - trait {:termination false} Engine { - const input: string - - opaque function Succeed(result: R): (p: Parser) - ensures forall pos: nat | pos <= |input| :: p.requires(pos) && p(pos).PSuccess? && pos == p(pos).pos <= |input| - // A parser that does not consume any input and returns the given value - { - (pos: nat) requires pos <= |input| => PSuccess(pos, result) - } - - opaque function Bind( - left: Parser, - right: (L, nat) --> Parser - ) : (p: Parser) - // A parser such that, if the left parser succeeds, then the right parser is obtained by using the result of the left parser - ensures forall pos: nat | BindSpec(|input|, left, right, pos) - :: p.requires(pos) - && (p(pos).PSuccess? ==> - && left(pos).PSuccess? - && left(pos).pos <= p(pos).pos <= |input| - && p(pos) == right(left(pos).result, left(pos).pos)(left(pos).pos)) - { - BindSucceeds(|input|, left, right) - } - - opaque function Epsilon(pos: nat): (pr: ParseResult<()>) - requires pos <= |input| - ensures pr.PSuccess? && pr.pos == pos - { - Epsilon(|input|)(pos) - } - - opaque function Map(underlying: Parser, mappingFunc: R --> U) - : (p: Parser) - // A parser combinator that makes it possible to transform the result of a parser in another one - // The mapping function can be partial - ensures forall pos: nat | MapSpec(|input|, underlying, mappingFunc, pos) - :: && p.requires(pos) - && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) - { - Map(|input|, underlying, mappingFunc) - } - - ghost predicate MapFailureSpec(pos: nat, underlying: Parser) { - pos <= |input| && underlying.requires(pos) - } - - opaque function MapFailure(underlying: Parser, mappingFunc: ParseResult --> ParseResult) - : (p: Parser) - requires forall p: ParseResult | p.PFailure? :: mappingFunc.requires(p) && mappingFunc(p).PFailure? - requires forall pos: nat | pos <= |input| :: - && underlying.requires(pos) - && (underlying(pos).PSuccess? ==> pos <= underlying(pos).pos <= |input|) - ensures forall pos: nat | MapFailureSpec(pos, underlying) :: - && p.requires(pos) - //&& (p(pos).PSuccess? <==> underlying(pos).PSuccess?) - && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) - { - (pos: nat) requires pos <= |input| => - var r := underlying(pos); - if r.PSuccess? then r else - mappingFunc(r) - } - - ghost predicate ConcatSpec(left: Parser, right: Parser, pos: nat) { - ConcatSpec_(|input|, left, right, pos) - } - - opaque function Concat( - left: Parser, - right: Parser - ) : (p: Parser<(L, R)>) - // Makes it possible to concatenate two consecutive parsers and return the pair of the results - ensures forall pos: nat | - ConcatSpec_(|input|, left, right, pos) - :: p.requires(pos) - && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) - { - Concat(|input|, left, right) - } - - opaque function ConcatR( - left: Parser, - right: Parser - ) : (p: Parser) - // Makes it possible to concatenate two consecutive parsers and return the pair of the results - ensures forall pos: nat | - ConcatSpec_(|input|, left, right, pos) - :: p.requires(pos) - && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) - { - ConcatR(|input|, left, right) - } - - opaque function ConcatL( - left: Parser, - right: Parser - ) : (p: Parser) - // Makes it possible to concatenate two consecutive parsers and return the pair of the results - ensures forall pos: nat | - ConcatSpec_(|input|, left, right, pos) - :: p.requires(pos) - && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) - { - ConcatL(|input|, left, right) - } - - opaque function Or( - left: Parser, - right: Parser - ) : (p: Parser) - // Builds a parser from left and right such that, if left fails and is recoverable, then right is used instead. - ensures forall pos: nat | - OrSpec(|input|, left, right, pos) - :: p.requires(pos) - && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) - { - Or_(|input|, left, right) - } - - opaque function EitherP( - left: Parser, - right: Parser - ) : (p: Parser>) - // Same as 'Or' but supports returning two heterogeneous values - ensures forall pos: nat | - EitherPSpec(|input|, left, right, pos) - :: p.requires(pos) - && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) - { - EitherP_(|input|, left, right) - } - - // Returns a function that tests if, at the given position, we can find the string toTest - opaque function TestString(toTest: string): (test: nat --> bool) - ensures forall pos: nat | pos <= |input| :: test.requires(pos) - { - (pos: nat) requires pos <= |input| => - pos + |toTest| <= |input| && input[pos..pos+|toTest|] == toTest - } - - // Returns a function that tests if, at the given position, we can find the string toTest - opaque function TestNotString(toTest: string): (test: nat --> bool) - ensures forall pos: nat | pos <= |input| :: test.requires(pos) - { - (pos: nat) requires pos <= |input| => - !(pos + |toTest| <= |input| && input[pos..pos+|toTest|] == toTest) - } - - opaque function CharTest?(test: nat --> bool): (p: Parser) - requires forall pos: nat | pos < |input| :: test.requires(pos) - ensures forall pos: nat | pos <= |input| :: - p.requires(pos) && - (p(pos).PSuccess? ==> pos < |input| && pos + 1 == p(pos).pos) - { - (pos: nat) requires pos <= |input| => - if pos < |input| && test(pos) then PSuccess(pos + 1, input[pos]) - else PFailure(Recoverable, "expected a different char but that's ok", pos) - } - - ghost predicate ConstSpec(expected: string, p: Parser) { - forall pos: nat :: - && p.requires(pos) - && (p(pos).PSuccess? ==> - pos + |expected| <= |input| && p(pos).pos == pos + |expected|) - } - - opaque function C(expected: string): (p: ParserBuilder) - ensures p.size == |input| - ensures ConstSpec(expected, p.apply) - { - B_(|input|, Const(expected)) - } - opaque function C?(expected: string): (p: ParserBuilder) - ensures p.size == |input| - ensures ConstSpec(expected, p.apply) - { - B_(|input|, Const?(expected)) - } - function B(underlying: Parser): (p: ParserBuilder) - ensures p.size == |input| - { - B_(|input|, underlying) - } - - opaque function Maybe(underlying: Parser): (p: Parser>) - requires IsRegular(underlying) - ensures IsRegular(p) - { - Or(Map(underlying, (result: R) => Some(result)), Succeed(None)) - } - - opaque function Newline(): (p: Parser) - ensures IsRegular(p) - { - Or(Const?("\r\n"), Or(Const?("\r"), Const("\n"))) - } - - opaque function Test?(test: (string, nat) --> bool): (p: Parser<()>) - requires forall pos: nat | pos <= |input| :: test.requires(input, pos) - ensures forall pos: nat | pos <= |input| :: - p.requires(pos) && - (p(pos).PSuccess? ==> pos <= p(pos).pos) - { - (pos: nat) requires pos <= |input| => - if test(input, pos) then PSuccess(pos, ()) else PFailure(Recoverable, "Test failed", pos) - } - - // Given a test on a position, returns a parser that succeeds with the longest string - // starting at a given position which succeeds the test on every character - // If the test succeeds immediately, returns a recoverable failure instead - opaque function While?(test: nat --> bool): (p: Parser) - requires forall pos: nat | pos <= |input| :: test.requires(pos) - ensures forall pos: nat | pos <= |input| :: p.requires(pos) - && (p(pos).PSuccess? ==> pos < p(pos).pos <= |input|) - { - var p := Bind(Rep(CharTest?(test)), - (result: string, pos': nat) => - if result == "" then Fail("Did not find an non-empty string satisfying test", Recoverable) - else Succeed(result)); - assert forall pos: nat | pos <= |input| :: p.requires(pos) - && (p(pos).PSuccess? ==> pos < p(pos).pos <= |input|) by { - forall pos : nat | pos <= |input| ensures p.requires(pos) - && (p(pos).PSuccess? ==> pos < p(pos).pos <= |input|){ - assert p.requires(pos); - if(p(pos).PSuccess?) { - RepDoesIncreasePosSometimes(CharTest?(test), pos); - } - } - } - p - } - - opaque function EverythingUntilAndDrop(str: string): (p: Parser) - ensures forall pos: nat | pos <= |input| :: - p.requires(pos) - && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) - { - ConcatL(While?(TestNotString(str)), Const(str)) - } - - - ghost predicate RepSpec(underlying: Parser, pos: nat) { - && pos <= |input| - && (forall pos' | pos <= pos' <= |input| :: - && underlying.requires(pos') - && (underlying(pos').PSuccess? ==> pos' <= underlying(pos').pos <= |input|)) - } - - opaque function Rep(underlying: Parser): (p: Parser>) - ensures forall pos: nat | RepSpec(underlying, pos) :: - p.requires(pos) - && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) - { - (pos: nat) - requires RepSpec(underlying, pos) - => - Repeat0(underlying, pos, [], |input|) - } - - lemma RepDoesIncreasePosSometimes(underlying: Parser, pos: nat) - requires pos <= |input| && RepeatSpec(underlying, pos, |input|-pos, |input|) - requires underlying.requires(pos) && underlying(pos).PSuccess? ==> - pos < underlying(pos).pos - ensures - var p := Rep(underlying); - (p(pos).PSuccess? && |p(pos).result| > 0 ==> pos < p(pos).pos) - { - reveal Rep(); - reveal Repeat0(); - } - - opaque function RepAcc( - underlying: Parser, - init: I, - combine: (I, R) -> I - ): (p: Parser) - ensures forall pos: nat | RepSpec(underlying, pos) :: - && p.requires(pos) - && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) - { - (pos: nat) requires RepSpec(underlying, pos) => - RepeatAcc_(underlying, pos, init, combine, |input|) - } - - ghost predicate FixSpec(size: nat, underlying: (Parser, nat) --> ParseResult, pos: nat) { - && pos <= size - && forall callback: Parser, u: nat | - FixSpecInner(size, callback, u) :: - underlying.requires(callback, u) - && (underlying(callback, u).PSuccess? ==> u <= underlying(callback, u).pos <= size) - } - function GetFixBase(): map> { map[] } - - opaque function Fix(underlying: (Parser, nat) --> ParseResult): (p: Parser) - ensures forall pos: nat| FixSpec(|input|, underlying, pos) - :: p.requires(pos) - && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) - { - (pos: nat) - requires FixSpec(|input|, underlying, pos) - => - Fixpoint_( - |input|, - underlying, - pos - ) - } - // TODO: Dafny clinic. Cannot make it opaque, otherwise - // even with reveal cannot verify other functions - ghost predicate FixMapInnerOnce(fun: string, impl: ParserMapper, otherFuns: set, size: nat) - { - FixpointMapSpecOnce(fun, impl, otherFuns, |input|) - } - - ghost predicate FixMapInner(size: nat, underlying: map>) { - forall fun: string <- underlying.Keys :: - FixMapInnerOnce(fun, underlying[fun], underlying.Keys, size) - } - - opaque function FixMap( - underlying: map>, - fun: string - ): (p: Parser) - requires {:error "Second argument of FixMap might not be a key of the first"} fun in underlying.Keys - requires {:error "First argument might not satisfy FixMapInner(|input|, arg). Said otherwise, key/value pairs of the first argument might not all satisfy FixMapInnerOnce(key, value, keys, |input|)"} FixMapInner(|input|, underlying) - ensures forall pos: nat | pos <= |input| :: - && p.requires(pos) - && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) - { - (pos: nat) requires pos <= |input| => - //reveal FixMapInnerOnce(); - FixpointMap_( - |input|, - underlying, - fun, - pos - ) - } - opaque function FixMapBuilder(ghost functions: set): (r: FixMapParserBuilder) - ensures r.Valid() - ensures |r.underlying.Keys| == 0 - ensures r.functions == functions - ensures r.size == |input| - { - var underlying: map> := map[]; - FixMapParserBuilder(|input|, functions, underlying) - } - - const EOS: Parser<()> := - (pos: nat) => if pos >= |input| then PSuccess(pos, ()) else PFailure(Fatal, "Expected end of string", pos) - - opaque function FirstOf(others: seq>): (p: ParserBuilder) - requires |others| > 0 - requires forall other <- others :: other.size == |input| - ensures - forall pos: nat | - forall pp <- others :: - pp.apply.requires(pos) && (pp.apply(pos).PSuccess? ==> pos <= pp.apply(pos).pos <= |input|) - :: - p.apply.requires(pos) && (p.apply(pos).PSuccess? ==> pos <= p.apply(pos).pos <= |input|) - { - FirstOf_(|input|, others) - } - - function R(result: R): (p: ParserBuilder) - { - B(Succeed(result)) - } - - const natToDigit: seq := "0123456789" - const digitToNat: map := map i | 0 <= i < |natToDigit| :: natToDigit[i] := i - - opaque function Digit(test: bool := true): (p: Parser) - ensures forall pos:nat | pos <= |input| :: - && p.requires(pos) - && (p(pos).PSuccess? ==> - && pos < |input| - && input[pos] in digitToNat - && digitToNat[input[pos]] == p(pos).result - && 0 <= p(pos).result <= 9 - && p(pos).pos == pos + 1) - { - (pos: nat) requires pos <= |input| => - if pos == |input| then PFailure(if test then Recoverable else Fatal, "Expected a digit", pos) else - if input[pos] in digitToNat then - PSuccess(pos + 1, digitToNat[input[pos]]) - else PFailure(if test then Recoverable else Fatal, "Expected a digit", pos) - } - - ghost predicate RecSpec(fun: string, otherFuns: set, rec: string --> Parser, pos: nat) - { - FixMapSpecInner(fun, otherFuns, |input|, rec, pos) - } - - ghost predicate RecSpecOnce(fun: string, otherFuns: set, mapper: ParserMapper) { - FixMapInnerOnce(fun, mapper, otherFuns, |input|) - } - - // TODO: We have the ability to define another parser given the result of the first one, - // but I'm missing the ability to call another parser builder with the result of the first one - // to avoid callbacks. - - opaque function {:vcs_split_on_every_assert} Nat(test: bool := true): (p: Parser) - ensures forall pos: nat | pos <= |input| :: - && p.requires(pos) - && (p(pos).PSuccess? ==> pos < p(pos).pos <= |input|) - { - var d? := Digit(); - Bind(d?, (firstdigit: DigitNat, pos: nat) => - RepAcc(d?, firstdigit, - (previous, next) => previous*10 + next) - ) - } - - opaque function {:vcs_split_on_every_assert} N(test: bool := true): (p: ParserBuilder) - ensures p.size == |input| && - forall pos: nat | pos <= |input| :: - && p.apply.requires(pos) - && (p.apply(pos).PSuccess? ==> pos < p.apply(pos).pos <= |input|) - { - B(Nat(test)) - } - - opaque function Spaces?(): (r: Parser) - ensures forall pos: nat | pos <= |input| :: - r.requires(pos) - && (r(pos).PSuccess? ==> pos <= r(pos).pos <= |input|) - { - (pos: nat) requires pos <= |input| => - if pos < |input| && input[pos] in " \n\r\result" then - PSuccess(pos+1, input[pos..pos+1]) - else - PFailure(Recoverable, "Spaces", pos) - } - - opaque function SkipSpaces(p: Parser): (r: Parser) - requires forall pos: nat | pos <= |input| :: - p.requires(pos) - && (p(pos).PSuccess? ==> pos <= p(pos).pos <= |input|) - ensures forall pos: nat | pos <= |input| :: - r.requires(pos) - && (r(pos).PSuccess? ==> pos <= r(pos).pos <= |input|) - { - ConcatR(Spaces?(), p) - } - function LineContainingPos(pos: nat, p: nat := 0, lineNumber: nat := 0, charNumber: nat := 0, startLinePos: nat := 0): (result: (string, nat, nat)) - decreases |input| - p - ensures 0 <= result.2 <= |input| - requires 0 <= charNumber <= p - requires startLinePos <= p <= |input| - { - if p >= |input| then - assert charNumber <= |input|; - (input[startLinePos..p], lineNumber, charNumber) - else - if input[p] == '\n' || p == |input| then - if pos <= p then - (input[startLinePos..p], lineNumber, charNumber) - else - LineContainingPos(pos, p + 1, lineNumber + 1, 0, p + 1) - else - LineContainingPos(pos, p + 1, lineNumber, if p <= pos then charNumber + 1 else charNumber, startLinePos) - } - - ghost predicate IsRegular(p: Parser) { - IsRegular_(p, |input|) - } - - /*opaque function Regex(s: string): (r: Parser) - ensures forall pos: nat | pos <= |input| :: - r.requires(pos) - && (r(pos).PSuccess? ==> pos <= r(pos).pos <= |input|) - { - if s == "" then Epsilon - else - } by method { - - }*/ - - function FeedbackToString(result: ParseResult): string - requires result.PFailure? - { - var (line, lineNumber, charNumber) := LineContainingPos(result.pos); - result.message + " at position "+StringNat.natToString(result.pos)+" line "+StringNat.natToString(lineNumber)+", column "+StringNat.natToString(charNumber)+":\n>"+ - line+"\n"+seq(charNumber, i => ' ')+"^\n" - } - method ReportError(p: ParseResult) - requires p.PFailure? - { - var (line, lineNumber, charNumber) := LineContainingPos(p.pos); - print "Parse error at position ",p.pos," line ",lineNumber,", column ",charNumber,":\n>", - line, "\n", seq(charNumber, i => ' '), "^\n", - p.message, "\n"; - return; - } - function Debug(message: string, x: R): R { - x - } by method { - print message, ":", x, "\n"; - return x; - } - - function DebugParserFail(msg: string): Parser { - (p: nat) => ParseResult.PFailure(Recoverable, "", Debug( - if p < |input| then msg + "'"+( - if input[p] == '\r' then "\\r" else if input[p] == '\n' then "\\n" else input[p..p+1]) - + "' " + FeedbackToString(PFailure(Recoverable, "", p)) + "\n" else - msg, p)) - } - - opaque function DebugParser(msg: string, other: Parser): (p: Parser) - ensures IsRegular(other) ==> IsRegular(p) - { - var debugParser := DebugParserFail(msg+" (before)"); - var otherParserDebugged := (p: nat) requires other.requires(p) => Debug(msg+" (after)", other(p)); - Or(debugParser, otherParserDebugged) - } - } - class EngineTest extends Engine { - constructor() { - this.input := ""; - } - } - type DigitNat = d: nat | 0 <= d <= 9 - */ -} - -abstract module ParserTests refines Parsers { - lemma AboutSucceed(result: R, input: seq) - ensures - var p := Succeed(result); - && p(input).PSuccess? - && p(input).remaining == input - { reveal Succeed(); } - - lemma AboutFail_(message: string, level: FailureLevel, input: seq) - ensures - var p := Fail(message, level)(input); - && p.PFailure? - && p.data == FailureData(message, input, Option.None) - && p.level == level - { - reveal Fail(); - } - - lemma AboutFail_2(message: string, input: seq) - ensures - var p := Fail(message)(input); - && p.PFailure? - && p.level == Recoverable - && p.data == FailureData(message, input, Option.None) - { - reveal Fail(); - } - - lemma AboutBind_( - left: Parser, - right: (L, seq) -> Parser, - input: seq - ) - ensures - var p := BindSucceeds(left, right)(input); - && var leftResult := left(input); - && !leftResult.IsFailure() - ==> var leftValues := left(input).Extract(); - && var rightResult := right(leftValues.0, leftValues.1)(leftValues.1); - && !rightResult.IsFailure() - ==> && !p.IsFailure() - && p.remaining == rightResult.remaining - && p.result == rightResult.result - { - reveal BindSucceeds(); - } - - lemma AboutMap_(underlying: Parser, mappingFunc: R -> U, input: seq) - ensures var p := Map(underlying, mappingFunc); - && (underlying(input).PSuccess? <==> p(input).PSuccess?) - && (p(input).PSuccess? ==> - && p(input).remaining == underlying(input).remaining - && p(input).result == mappingFunc(underlying(input).result)) - { - reveal Map(); - reveal BindSucceeds(); - reveal Succeed(); - } - - function BindMapCallback(mappingFunc: R -> U): - (R, seq) -> Parser - { - (result: R, remaining: seq) => Succeed(mappingFunc(result)) - } - - lemma AboutMap_Bind_(underlying: Parser, mappingFunc: R -> U, input: seq) - ensures Map(underlying, mappingFunc)(input) - == BindSucceeds(underlying, BindMapCallback(mappingFunc))(input) - { - reveal Map(); - reveal BindSucceeds(); - reveal Succeed(); - } - - lemma AboutConcat( - left: Parser, - right: Parser, - input: seq) - ensures var p := Concat(left, right); - && (p(input).PSuccess? ==> - && left(input).PSuccess? - && p(input).result.0 == left(input).result - && var input2 := left(input).remaining; - && right(input2).PSuccess? - && p(input).result.1 == right(input2).result - && p(input).remaining == right(input2).remaining) - { - reveal Concat(); - } - - function BindConcatCallback(right: Parser): (L, seq) -> Parser<(L, R)> - { - (l: L, remaining: seq) => - Map(right, (r: R) => (l, r)) - } - - lemma AboutConcatBind_( - left: Parser, - right: Parser, - input: seq) - ensures Concat(left, right)(input) == BindSucceeds(left, BindConcatCallback(right))(input) - { - reveal Concat(); - reveal BindSucceeds(); - reveal Succeed(); - reveal Map(); - } - - lemma AboutConcatR( - left: Parser, - right: Parser, - input: seq) - ensures var p := ConcatR(left, right); - && (p(input).PSuccess? ==> - && left(input).PSuccess? - && var input2 := left(input).remaining; - && right(input2).PSuccess? - && p(input).result == right(input2).result - && p(input).remaining == right(input2).remaining) - { - reveal ConcatR(); - } - - function first(): ((L, R)) -> L { - (lr: (L, R)) => lr.0 - } - function second(): ((L, R)) -> R { - (lr: (L, R)) => lr.1 - } - lemma AboutConcatConcatR( - left: Parser, - right: Parser, - input: seq) - ensures Map(Concat(left, right), second())(input) == ConcatR(left, right)(input) - { - reveal Concat(); - reveal Succeed(); - reveal ConcatR(); - reveal Map(); - } - - - lemma AboutConcatL( - left: Parser, - right: Parser, - input: seq) - ensures var p := ConcatL(left, right); - && (p(input).PSuccess? ==> - && left(input).PSuccess? - && var input2 := left(input).remaining; - && right(input2).PSuccess? - && p(input).result == left(input).result - && p(input).remaining == right(input2).remaining) - { - reveal ConcatL(); - } - lemma AboutConcatConcatL( - left: Parser, - right: Parser, - input: seq) - ensures Map(Concat(left, right), first())(input) == ConcatL(left, right)(input) - { - reveal Concat(); - reveal Succeed(); - reveal ConcatL(); - reveal Map(); - } - - predicate AboutRepeatIncreasesPosIfUnderlyingSucceedsAtLeastOnceEnsures( - underlying: Parser, - acc: seq, - input: seq - ) - { - var result := Repeat_(underlying, acc, input); - && result.PSuccess? - && |acc| <= |result.result| - && (underlying(input).PSuccess? && |underlying(input).remaining| < |input| - ==> - (|acc| < |result.result| && |result.remaining| < |input|)) - } - - predicate AboutFixpoint_Ensures( - underlying: Parser -> Parser, - input: seq) - { - var p := Fixpoint_(underlying, input); - p.PSuccess? ==> IsRemaining(input, p.remaining) - } - - lemma {:vcs_split_on_every_assert} AboutFixpoint_( - underlying: Parser -> Parser, - input: seq) - requires - forall callback: Parser, u: seq - | underlying(callback)(u).PSuccess? - :: IsRemaining(input, underlying(callback)(input).Remaining()) - ensures AboutFixpoint_Ensures(underlying, input) - { - reveal Fixpoint_(); - } - - - predicate AboutFixpointMap_Ensures( - underlying: map>, - fun: string, - input: seq - ) { - var p := FixpointMap_(underlying, fun, input); - && (p.PSuccess? ==> IsRemaining(input, p.remaining)) - } - -} - -module StringParsers refines ParserTests { - type C = char - - opaque function CharTest(test: char -> bool, name: string): (p: Parser) - { - (input: string) => - if 0 < |input| && test(input[0]) then PSuccess(input[0], input[1..]) - else PFailure(Recoverable, - FailureData("expected a "+name - , input, Option.None)) - } - - opaque function Char(expectedChar: char): (p: Parser) - { - CharTest((c: char) => c == expectedChar, [expectedChar]) - } - - opaque function Digit(): (p: Parser) - { - CharTest(c => c in "0123456789", "digit") - } - - opaque function DigitNumber(): (p: Parser) - { - Map(Digit(), (c: char) => - var n: nat := (if StringNat.IsStringNat([c]) then // Should always be true - StringNat.stringToNat([c]) - else 0); n - ) - } - - opaque function Nat(): (p: Parser) - { - Bind(DigitNumber(), - (result: nat) => - RepeatAcc(DigitNumber(), - (previous: nat, c: nat) => - var r: nat := previous * 10 + c; r, - result - ) - ) - } - - opaque function Int(): (p: Parser) - { - Bind(Maybe(Char('-')), - (minusSign: Option) => - Map(Nat(), (result: nat) => if minusSign.Some? then 0-result else result)) - } - - opaque function String(expected: string): (p: Parser) - { - (input: string) => - if |expected| <= |input| && input[0..|expected|] == expected then PSuccess(expected, input[|expected|..]) - else PFailure(Recoverable, FailureData("expected '"+expected+"'", input, Option.None)) - } - - function repeat(str: string, n: nat): (r: string) - ensures |r| == |str| * n - { - if n == 0 then "" - else str + repeat(str, n-1) - } - - // TODO: Mention the error level, the line number, the column number - // TODO: Extract only the line of interest - method PrintFailure(input: string, result: ParseResult, printPos: int := -1) - requires result.PFailure? - decreases result.data - { - if printPos == -1 { - print "Error:\n"; - } - var pos: int := |input| - |result.data.remaining|; // Need the parser to be Valid() - if pos < 0 { // Could be proved false if parser is Valid() - pos := 0; - } - if printPos != pos { - print input, "\n"; - } - if printPos != pos { - print repeat(" ", pos), "^","\n"; - } - print result.data.message; - if result.data.next.Some? { - print ", or\n"; - PrintFailure(input, PFailure(result.level, result.data.next.value), pos); - } else { - print "\n"; - } - } - -} - -// From these parsers, we can create displayers -// and prove the roundtrip displayer / parser if we wanted to -abstract module ParsersDiplayers { - import Parsers - - type Parser = Parsers.Parser - type C = Parsers.C - - type Displayer<-R> = (R, seq) -> seq - - function Concat( - left: Displayer, - right: Displayer - ): Displayer<(A, B)> { - (ab: (A, B), remaining: seq) => - var remaining2 := right(ab.1, remaining); - var remaining3 := left(ab.0, remaining2); - remaining3 - } - - ghost predicate Roundtrip(parse: Parser, display: Displayer) - // The parser and the displayer are dual to each other - // means that if we parse after printing, we get the same result - { - forall a: A, remaining: seq :: - parse(display(a, remaining)) == Parsers.PSuccess(a, remaining) - } - - lemma {:rlimit 200} ConcatRoundtrip( - pA: Parser, ppA: Displayer, - pB: Parser, ppB: Displayer - ) - requires Roundtrip(pA, ppA) && Roundtrip(pB, ppB) - ensures Roundtrip(Parsers.Concat(pA, pB), Concat(ppA, ppB)) - { - reveal Parsers.Concat(); - var p := Parsers.Concat(pA, pB); - var d := Concat(ppA, ppB); - forall ab: (A, B), remaining: seq ensures - p(d(ab, remaining)) == Parsers.PSuccess(ab, remaining) - { - } - } -} \ No newline at end of file diff --git a/src/Parsers/parserTests.dfy b/src/Parsers/parserTests.dfy new file mode 100644 index 00000000..70f18e42 --- /dev/null +++ b/src/Parsers/parserTests.dfy @@ -0,0 +1,413 @@ +include "parsers.dfy" +//include "../NonLinearArithmetic/DivMod.dfy" + +abstract module ParserTests refines Parsers { + //import DivMod + + lemma AboutSucceed(result: R, input: seq) + ensures + var p := Succeed(result); + && p(input).Success? + && p(input).remaining == input + { reveal Succeed(); } + + lemma AboutFail_(message: string, level: FailureLevel, input: seq) + ensures + var p := Fail(message, level)(input); + && p.Failure? + && p.data == FailureData(message, input, Option.None) + && p.level == level + { + reveal Fail(); + } + + lemma AboutFail_2(message: string, input: seq) + ensures + var p := Fail(message)(input); + && p.Failure? + && p.level == Recoverable + && p.data == FailureData(message, input, Option.None) + { + reveal Fail(); + } + + lemma AboutBind_( + left: Parser, + right: (L, seq) -> Parser, + input: seq + ) + ensures + var p := BindSucceeds(left, right)(input); + && var leftResult := left(input); + && !leftResult.IsFailure() + ==> var leftValues := left(input).Extract(); + && var rightResult := right(leftValues.0, leftValues.1)(leftValues.1); + && !rightResult.IsFailure() + ==> && !p.IsFailure() + && p.remaining == rightResult.remaining + && p.result == rightResult.result + { + reveal BindSucceeds(); + } + + lemma AboutMap_(underlying: Parser, mappingFunc: R -> U, input: seq) + ensures var p := Map(underlying, mappingFunc); + && (underlying(input).Success? <==> p(input).Success?) + && (p(input).Success? ==> + && p(input).remaining == underlying(input).remaining + && p(input).result == mappingFunc(underlying(input).result)) + { + reveal Map(); + reveal BindSucceeds(); + reveal Succeed(); + } + + function BindMapCallback(mappingFunc: R -> U): + (R, seq) -> Parser + { + (result: R, remaining: seq) => Succeed(mappingFunc(result)) + } + + lemma AboutMap_Bind_(underlying: Parser, mappingFunc: R -> U, input: seq) + ensures Map(underlying, mappingFunc)(input) + == BindSucceeds(underlying, BindMapCallback(mappingFunc))(input) + { + reveal Map(); + reveal BindSucceeds(); + reveal Succeed(); + } + + lemma AboutConcat( + left: Parser, + right: Parser, + input: seq) + ensures var p := Concat(left, right); + && (p(input).Success? ==> + && left(input).Success? + && p(input).result.0 == left(input).result + && var input2 := left(input).remaining; + && right(input2).Success? + && p(input).result.1 == right(input2).result + && p(input).remaining == right(input2).remaining) + { + reveal Concat(); + reveal ConcatMap(); + } + + function BindConcatCallback(right: Parser): (L, seq) -> Parser<(L, R)> + { + (l: L, remaining: seq) => + Map(right, (r: R) => (l, r)) + } + + lemma AboutConcatBind_( + left: Parser, + right: Parser, + input: seq) + ensures Concat(left, right)(input) == BindSucceeds(left, BindConcatCallback(right))(input) + { + reveal Concat(); + reveal BindSucceeds(); + reveal Succeed(); + reveal Map(); + reveal ConcatMap(); + } + + lemma AboutConcatR( + left: Parser, + right: Parser, + input: seq) + ensures var p := ConcatR(left, right); + && (p(input).Success? ==> + && left(input).Success? + && var input2 := left(input).remaining; + && right(input2).Success? + && p(input).result == right(input2).result + && p(input).remaining == right(input2).remaining) + { + reveal ConcatR(); + reveal ConcatMap(); + } + + function first(): ((L, R)) -> L { + (lr: (L, R)) => lr.0 + } + function second(): ((L, R)) -> R { + (lr: (L, R)) => lr.1 + } + lemma AboutConcatConcatR( + left: Parser, + right: Parser, + input: seq) + ensures Map(Concat(left, right), second())(input) == ConcatR(left, right)(input) + { + reveal Concat(); + reveal Succeed(); + reveal ConcatR(); + reveal Map(); + reveal ConcatMap(); + } + + + lemma AboutConcatL( + left: Parser, + right: Parser, + input: seq) + ensures var p := ConcatL(left, right); + && (p(input).Success? ==> + && left(input).Success? + && var input2 := left(input).remaining; + && right(input2).Success? + && p(input).result == left(input).result + && p(input).remaining == right(input2).remaining) + { + reveal ConcatL(); + reveal ConcatMap(); + } + lemma AboutConcatConcatL( + left: Parser, + right: Parser, + input: seq) + ensures Map(Concat(left, right), first())(input) == ConcatL(left, right)(input) + { + reveal Concat(); + reveal Succeed(); + reveal ConcatL(); + reveal Map(); + reveal ConcatMap(); + } + + predicate AboutRepIncreasesPosIfUnderlyingSucceedsAtLeastOnceEnsures( + underlying: Parser, + acc: seq, + input: seq + ) + { + var result := RepSeq(underlying)(input); + && result.Success? + && |acc| <= |result.result| + && (underlying(input).Success? && |underlying(input).remaining| < |input| + ==> + (|acc| < |result.result| && |result.remaining| < |input|)) + } + + predicate AboutFix_Ensures( + underlying: Parser -> Parser, + input: seq) + { + var p := Recursive_(underlying, input); + p.Success? ==> IsRemaining(input, p.remaining) + } + + lemma {:vcs_split_on_every_assert} AboutFix_( + underlying: Parser -> Parser, + input: seq) + requires + forall callback: Parser, u: seq + | underlying(callback)(u).Success? + :: IsRemaining(input, underlying(callback)(input).Remaining()) + ensures AboutFix_Ensures(underlying, input) + { + reveal Recursive_(); + } + + + predicate AboutRecursiveMap_Ensures( + underlying: map>, + fun: string, + input: seq + ) { + var p := RecursiveMap_(underlying, fun, input); + && (p.Success? ==> IsRemaining(input, p.remaining)) + } + + + lemma Succeed_NonCrashing(result: R) + ensures Valid(Succeed(result)) + { reveal Valid(), Succeed(); } + + lemma Succeed_NonCrashingAuto() + ensures forall result: R :: Valid(Succeed(result)) + { reveal Valid(), Succeed(); } + + lemma Epsilon_NonCrashing() + ensures Valid(Epsilon()) + { reveal Valid(), Epsilon(); Succeed_NonCrashing(()); } + + lemma AboutEpsilon_(input: seq) + ensures + var p := Epsilon(); + && p(input).Success? + && p(input).remaining == input + { + reveal Epsilon(); + reveal Succeed(); + } + + lemma Fail_NonCrashing(message: string) + ensures Valid(Fail(message, Recoverable)) + { reveal Fail(); reveal Valid(); } + + lemma Fail_NonCrashingAuto() + ensures forall message :: Valid(Fail(message, Recoverable)) + { reveal Fail(); reveal Valid(); } + + ghost predicate BindRightNonCrashing(right: (L, seq) -> Parser) { + forall l: L, input: seq :: Valid(right(l, input)) + } + + lemma Bind_NonCrashing( + left: Parser, + right: (L, seq) -> Parser + ) + requires Valid(left) + requires BindRightNonCrashing(right) + ensures Valid(BindSucceeds(left, right)) + { + reveal BindSucceeds(), Valid(); + var p := BindSucceeds(left, right); + forall input: seq ensures + && (p(input).Failure? ==> p(input).level == Recoverable) + && IsRemaining(input, p(input).Remaining()) + { + + } + } + + ghost predicate Bind_NonCrashingRight(left: Parser) + requires Valid(left) + { + forall right: (L, seq) -> Parser | BindRightNonCrashing(right) :: + Valid(BindSucceeds(left, right)) + } + + lemma Bind_NonCrashingAuto() + ensures forall left: Parser | Valid(left) :: + Bind_NonCrashingRight(left) + { + forall left: Parser | Valid(left), + right: (L, seq) -> Parser | BindRightNonCrashing(right) + ensures + Valid(BindSucceeds(left, right)) + { + Bind_NonCrashing(left, right); + } + } + + lemma intToStringThenStringToIntIdem(n: int) + decreases if n < 0 then 1 - n else n + ensures 0 <= n ==> 1 <= |intToString(n)| && intToString(n)[0] != '-' + ensures stringToInt(intToString(n)) == n + { // Proof is automatic + reveal intToString(), stringToInt(), digitToInt(); + if n < 0 { + calc { + stringToInt(intToString(n)); + stringToInt("-" + intToString(-n)); + 0 - stringToInt(intToString(-n)); + { intToStringThenStringToIntIdem(-n); } + n; + } + } else if 0 <= n <= 9 { + assert stringToInt(intToString(n)) == n; + } else { + assert intToString(n) == intToString(n / 10) + intToString(n % 10); + var s := intToString(n); + } + } + opaque predicate IsStringInt(s: string): (b: bool) + ensures b ==> |s| > 0 + { + |s| > 0 && + if s[0] == '-' then + |s| > 1 && s[1] != '0' && + (forall i | 1 <= i < |s| :: s[i] in "0123456789") + else + (|s| > 1 ==> s[0] != '0') && + (forall i | 0 <= i < |s| :: s[i] in "0123456789") + } + + lemma stringToIntNonnegative(s: string) + requires IsStringInt(s) + requires s[0] != '-' + decreases |s| + ensures 0 <= stringToInt(s) + ensures s != "0" ==> 0 < stringToInt(s) + ensures |s| > 1 ==> 10 <= stringToInt(s) + { + if |s| == 0 { + + } else if |s| == 1 { + reveal digitToInt(), stringToInt(), IsStringInt(); + match s[0] + case '0' => case '1' => case '2' => case '3' => case '4' => + case '5' => case '6' => case '7' => case '8' => case '9' => + case _ => + } else if s[0] == '-' { + } else { + assert !(|s| == 0 || |s| == 1 || s[0] == '-'); + reveal stringToInt(); + assert stringToInt(s) == stringToInt(s[0..|s|-1])*10 + stringToInt(s[|s|-1..|s|]); + assert IsStringInt(s[0..|s|-1]) by { + reveal IsStringInt(); + } + stringToIntNonnegative(s[..|s|-1]); + var tail := s[|s|-1..|s|]; + assert IsStringInt(tail) && tail[0] != '-' by { + reveal IsStringInt(); + } + stringToIntNonnegative(tail); + reveal IsStringInt(); + assert |s| > 1 ==> 10 <= stringToInt(s); + } + } + + lemma stringToIntThenIntToStringIdem(s: string) + requires IsStringInt(s) + decreases |s| + ensures s[0] != '-' ==> 0 <= stringToInt(s) + ensures |s| == 1 ==> 0 <= stringToInt(s) <= 9 + ensures intToString(stringToInt(s)) == s + { + assert |s| > 0; + if 1 <= |s| && s[0] == '-' { + reveal intToString(), stringToInt(), IsStringInt(); + assert forall i | 1 <= i < |s| :: s[i] in "0123456789"; + calc { + intToString(stringToInt(s)); + intToString(0 - stringToInt(s[1..])); + } + } else if |s| == 1 { + reveal intToString(), stringToInt(), IsStringInt(), digitToInt(); + calc { + intToString(stringToInt(s)); + s; + } + } else { + var n := stringToInt(s); + stringToIntNonnegative(s); + var init := s[..|s|-1]; + var last := s[|s|-1..|s|]; + var q := stringToInt(init); + var r := stringToInt(last); + assert IsStringInt(init) by { reveal IsStringInt(); } + assert IsStringInt(last) by { reveal IsStringInt(); } + stringToIntThenIntToStringIdem(init); + stringToIntThenIntToStringIdem(last); + assert stringToInt(s) == + stringToInt(s[0..|s|-1])*10 + stringToInt(s[|s|-1..|s|]) by { + reveal stringToInt(); + } + assert n == q * 10 + r; + calc { + intToString(n); + { reveal intToString(); + assert !(n < 0); + assert n != 0; + } + intToString(n / 10) + intToString(n % 10); + s; + } + } + } +} diff --git a/src/Parsers/parsers.dfy b/src/Parsers/parsers.dfy new file mode 100644 index 00000000..693ef248 --- /dev/null +++ b/src/Parsers/parsers.dfy @@ -0,0 +1,550 @@ +include "../Wrappers.dfy" +include "library.dfy" + +abstract module Parsers +// Functional parsers consuming sequences seq from the left to the right. +// For parsers over strings, please refer to the StringParsers module +{ + import Wrappers + + type C(!new, ==) + // The character of the sequence being parsed + + type Parser<+R> = seq -> ParseResult + // A parser is a total function from a position to a parse result + // Because it returns a delta pos, it cannot return a position negative from the origing + // If the parsing is out of context, it will return a failure. + + type ParserSelector = string -> Parser + // A parser selector is a function that, given a name that exists, + // returns a parser associated to this name + + type Option = Wrappers.Option + // The common option type, synonym definition + + datatype FailureData = + FailureData( + message: string, + remaining: seq, + next: Option) + // A Parser failure can mention several places + // (e.g. which could have continued to parse) + { + function Concat(other: FailureData): FailureData + // Concatenates two failure datas, the first staying in the front + { + if next == Option.None then + this.(next := Option.Some(other)) + else + FailureData(message, remaining, Option.Some(next.value.Concat(other))) + } + } + + datatype FailureLevel = + // Failure level for parse results. + // A Fatal error results in a unique FailurePosition + // and will be propagated to the top ASAP + // A Recoverable error can typically be processed. + // Comittedness of the parser only depends if the .Remaining() + // of the parse result has moved since the input was provided. + Fatal | Recoverable + + datatype ParseResult<+R> = + // ParseResult is the type of what a parser taking a seq would return + | Failure(level: FailureLevel, data: FailureData) + // Returned if a parser failed. + | Success(result: R, remaining: seq) + // Returned if a parser succeeds, with the increment in the position + { + function Remaining(): seq + // If Remaining() is the same as the input, the parser is "uncommitted", + // which means combinators like Or and RepSeq can try alternatives + { + if Success? then remaining else data.remaining + } + + predicate IsFailure() { + Failure? + } + + predicate IsFatalFailure() { + Failure? && level == Fatal + } + + predicate IsFatal() + requires IsFailure() + { + level == Fatal + } + + function PropagateFailure(): ParseResult + requires IsFailure() + { + Failure(level, data) + } + + function Extract(): (R, seq) + requires !IsFailure() + { + (result, remaining) + } + + function Map(f: R -> R'): ParseResult + // Transforms the result of a successful parse result + { + match this + case Success(result, remaining) => + Success(f(result), remaining) + case Failure(level, data) => + Failure(level, data) + } + + function MapRecoverableError( + f: FailureData -> FailureData + ): ParseResult + // If the result is a recoverable error, + // let the function process it + { + match this + case Failure(Recoverable, data) => + Failure(Recoverable, f(data)) + case _ => this + } + + predicate NeedsAlternative(input: seq) + // Returns true if the parser result is a + // - A failure + // - Is recoverable + // - Did not consume any input (not-committed) + { + Failure? && level == Recoverable && input == Remaining() + } + } + + predicate IsRemaining(input: seq, remaining: seq) + // True if remaining is a suffix of the input + { + && |remaining| <= |input| + && input[|input|-|remaining|..] == remaining + } + + opaque ghost predicate Valid(underlying: Parser) + // A parser is valid iff for any input, it never returns a fatal error + // and always returns a suffix of its input + { + forall input: seq :: + && (underlying(input).Failure? ==> underlying(input).level == Recoverable) + && IsRemaining(input, underlying(input).Remaining()) + } + + // ######################################## + // Parser combinators. + // The following functions make it possible to create and compose parsers + // All these combinators provide Valid() parsers if their inputs are Valid() too + // ######################################## + + opaque function Succeed(result: R): (p: Parser) + // A parser that does not consume any input and returns the given value + { + (input: seq) => Success(result, input) + } + + opaque function Epsilon(): (p: Parser<()>) + // A parser that always succeeds, consumes nothing and returns () + { + Succeed(()) + } + + opaque function Fail(message: string, level: FailureLevel := Recoverable): Parser + // A parser that does not consume any input and returns the given failure + { + (input: seq) => Failure(level, FailureData(message, input, Option.None)) + } + + opaque function EndOfString(): Parser<()> + // A parser that fails if the string has not been entirely consumed + { + (input: seq) => + if |input| == 0 then Success((), input) + else Failure(Recoverable, FailureData("expected end of string", input, Option.None)) + } + + opaque function Bind( + left: Parser, + right: L -> Parser + ) : (p: Parser) + // Fails if the left parser fails. + // If the left parser succeeds, provides its result and the remaining sequence + // to the right parser generator. + // For a more general version, look at BindSucceeds + { + (input: seq) + => + var (leftResult, remaining) :- left(input); + right(leftResult)(remaining) + } + + opaque function BindSucceeds( + left: Parser, + right: (L, seq) -> Parser + ) : (p: Parser) + // Fails if the left parser fails. + // If the left parser succeeds, provides its result and its remaining + // to the right parser generator and returns its result applied to the remaining + // For a more general version, look at BindResult + { + (input: seq) + => + var (leftResult, remaining) :- left(input); + right(leftResult, remaining)(remaining) + } + + opaque function BindResult( + left: Parser, + right: (ParseResult, seq) -> Parser + ) : (p: Parser) + // Given a left parser and a parser generator based on the output + // of the left parser, + // returns the result of the right parser applied on the original input + { + (input: seq) + => + right(left(input), input)(input) + } + + opaque function Map(underlying: Parser, mappingFunc: R -> U) + : (p: Parser) + // A parser combinator that makes it possible to transform the result of a parser in another one + // The mapping function can be partial + // ensures forall pos | MapSpec(size, underlying, mappingFunc, pos) :: + // p.requires(pos) + { + (input: seq) => + var (result, remaining) :- underlying(input); + var u := mappingFunc(result); + Success(u, remaining) + } + + opaque function Not(underlying: Parser): Parser<()> + // Returns a parser that succeeds if the underlying parser fails + // and vice-versa. The result does not consume any input + { + (input: seq) => + var l := underlying(input); + if l.IsFailure() then + if l.IsFatal() then l.PropagateFailure() + else Success((), input) + else Failure(Recoverable, FailureData("not failed", input, Option.None)) + } + + opaque function And( + left: Parser, + right: Parser + ) : (p: Parser<(L, R)>) + // Make the two parsers parse the same string and, if both suceed, + // returns a pair of the two results, with the remaining of the right + { + (input: seq) => + var (l, remainingLeft) :- left(input); + var (r, remainingRight) :- right(input); + Success((l, r), remainingRight) + } + + opaque function Or( + left: Parser, + right: Parser + ) : (p: Parser) + // left parses the string. If left succeeds, returns + // if left fails, two cases + // - If the error is recoverable and the parser did not consume input, + // then return what right returns + // - Otherwise return both errors + { + (input: seq) => + var p := left(input); + if !p.NeedsAlternative(input) then p else + var p2 := right(input); + if !p2.NeedsAlternative(input) then p2 else + p2.MapRecoverableError( + dataRight => + p.data.Concat(dataRight)) + } + + opaque function OrSeq( + alternatives: seq> + ): Parser + { + if |alternatives| == 0 then Fail("no alternatives") else + if |alternatives| == 1 then alternatives[0] + else + Or(alternatives[0], OrSeq(alternatives[1..])) + } + + opaque function Lookahead(underlying: Parser): (p: Parser) + // If the underlying parser succeeds, + // returns its result without committing the input + // if the underlying parser fails, + // - If the failure is fatal, returns it as-it + // - If the failure is recoverable, returns it without comitting the input + { + (input: seq) => + var p := underlying(input); + if p.IsFailure() then + if p.IsFatal() then + p + else + p.(data := FailureData(p.data.message, input, Option.None)) + else + p.(remaining := input) + } + + opaque function ?(underlying: Parser): (p: Parser) + // Like Lookahead, except that if the parser succeeds, + // it keeps the committedness of the input. + // Identical to Lookahead, if the underlying parser fails, + // - If the failure is fatal, returns it as-it + // - If the failure is recoverable, returns it without comitting the input + { + (input: seq) => + var p := underlying(input); + if p.IsFailure() then + if p.IsFatal() then + p + else + p.(data := FailureData(p.data.message, input, Option.None)) + else + p + } + + opaque function If( + condition: Parser, + succeed: Parser + ) : (p: Parser) + // If the condifition fails, returns a non-committing failure + // Suitable to use in Or parsers + { + Bind(Lookahead(condition), (l: L) => succeed) + } + + opaque function Maybe(underlying: Parser): Parser> + // Transforms a recoverable failure into None, + // and wraps a success into Some(...) + { + (input: seq) => + var u := underlying(input); + if u.IsFatalFailure() then u.PropagateFailure() + else + if u.Success? then u.Map(result => Option.Some(result)) + else Success(Option.None, input) + } + + opaque function ConcatMap( + left: Parser, + right: Parser, + mapper: (L, R) -> T + ) : (p: Parser) + // Apply two consecutive parsers consecutively + // If both succeed, apply the mapper to the result and return it + { + (input: seq) + => + var (l, remaining) :- left(input); + var (r, remaining2) :- right(remaining); + Success(mapper(l, r), remaining2) + } + + opaque function Concat( + left: Parser, + right: Parser + ) : (p: Parser<(L, R)>) + // Apply two consecutive parsers consecutively + // If both succeed, return the pair of the two results + { + (input: seq) => + var (l, remaining) :- left(input); + var (r, remaining2) :- right(remaining); + Success((l, r), remaining2) + } + + opaque function ConcatR( + left: Parser, + right: Parser + ) : (p: Parser) + // Return only the result of the right parser if the two parsers match + { + ConcatMap(left, right, (l, r) => r) + } + + opaque function ConcatL( + left: Parser, + right: Parser + ) : (p: Parser) + // Return only the result of the right parser if the two parsers match + { + ConcatMap(left, right, (l, r) => l) + } + + opaque function RepSeq( + underlying: Parser + ): Parser> + // Repeats the underlying parser until the first failure + // that accepts alternatives, and returns the underlying sequence + { + Rep(underlying, (result: seq, r: R) => result + [r], []) + } + + opaque function Rep( + underlying: Parser, + combine: (A, B) -> A, + acc: A + ): Parser + // Repeats the underlying parser until the first failure + // that accepts alternatives, combining results to an accumulator + // and return the final accumulator + { + (input: seq) => Rep_(underlying, combine, acc, input) + } + + opaque function {:tailrecursion true} Rep_( + underlying: Parser, + combine: (A, B) -> A, + acc: A, + input: seq + ): (p: ParseResult) + decreases |input| + // RepSeq the underlying parser over the input until a recoverable failure happens + // and returns the accumulated results + { + match underlying(input) + case Success(result, remaining) => + if |remaining| >= |input| then Success(acc, input) else + Rep_(underlying, combine, combine(acc, result), remaining) + case failure => + if failure.NeedsAlternative(input) then + Success(acc, input) + else + failure.PropagateFailure() + } + + opaque function Recursive( + underlying: Parser -> Parser + ): (p: Parser) + // Given a function that requires a parser to return a parser, + // provide the result of this parser to that function itself. + // Careful: This function is not tail-recursive and will consume stack. + // Prefer using Rep() or RepSeq() for sequences + { + (input: seq) => Recursive_(underlying, input) + } + + opaque function Recursive_( + underlying: Parser -> Parser, + input: seq + ): (p: ParseResult) + // Implementation for Recursive() + decreases |input| + { + var callback: Parser := + (remaining: seq) => + if |remaining| < |input| then + Recursive_(underlying, remaining) + else if |remaining| == |input| then + Failure(Recoverable, FailureData("no progress", remaining, Option.None)) + else + Failure(Fatal, FailureData("fixpoint called with an increasing remaining sequence", remaining, Option.None)); + underlying(callback)(input) + } + + opaque function RecursiveMap( + underlying: map>, + fun: string): (p: Parser) + // Given a map of name := recursive definitions, + // provide the result of this parser to the recursive definitions + // and set 'fun' as the initial parser. + // Careful: This function is not tail-recursive and will consume stack + { + (input: seq) => RecursiveMap_(underlying, fun, input) + } + + datatype RecursiveDef = RecursiveDef( + order: nat, + definition: ParserSelector -> Parser + ) // The order must be decreasing every time the function steps in + // But it can jump to a bigger order if the input is consumed + + opaque function RecursiveMap_( + underlying: map>, + fun: string, + input: seq + ): (p: ParseResult) + // Implementation for RecursiveMap() + decreases |input|, if fun in underlying then underlying[fun].order else 0 + { + if fun !in underlying then Failure(Fatal, FailureData("parser '"+fun+"' not found", input, Option.None)) else + var RecursiveDef(orderFun, definitionFun) := underlying[fun]; + var callback: ParserSelector + := + (fun': string) => + (var p : Parser := + if fun' !in underlying.Keys then + Fail(fun' + " not defined", Fatal) + else + var RecursiveDef(orderFun', definitionFun') := underlying[fun']; + (remaining: seq) => + if |remaining| < |input| || (|remaining| == |input| && orderFun' < orderFun) then + RecursiveMap_(underlying, fun', remaining) + else if |remaining| == |input| then + Failure(Recoverable, FailureData("non-progressing recursive call requires that order of '" + +fun'+"' ("+intToString(orderFun')+") is lower than the order of '"+fun+"' ("+intToString(orderFun)+")", remaining, Option.None)) + else + Failure(Fatal, FailureData("parser did not return a suffix of the input", remaining, Option.None)) + ; p); + definitionFun(callback)(input) + } + + function Debug_(message: string): string { + message + } by method { + print message, "\n"; + return message; + } + + opaque function DebugParser(msg: string, other: Parser): (p: Parser) + // A parser that, when invoked, will print a message before applying its underlying parser + // and also afterwards + { + (input: seq) => + var _ := Debug_(msg + "(before)"); + var p := other(input); + var _ := Debug_(msg + "(after)"); + p + } + + opaque function intToString(n: int): string + decreases if n < 0 then 1 - n else n + { + if n < 0 then "-" + intToString(-n) else + match n + case 0 => "0" case 1 => "1" case 2 => "2" case 3 => "3" case 4 => "4" + case 5 => "5" case 6 => "6" case 7 => "7" case 8 => "8" case 9 => "9" + case _ => intToString(n / 10) + intToString(n % 10) + } + + opaque function digitToInt(c: char): int { + match c + case '0' => 0 case '1' => 1 case '2' => 2 case '3' => 3 case '4' => 4 + case '5' => 5 case '6' => 6 case '7' => 7 case '8' => 8 case '9' => 9 + case _ => 0 + } + + opaque function stringToInt(s: string): int + decreases |s| + { + if |s| == 0 then 0 else + if |s| == 1 then digitToInt(s[0]) + else if s[0] == '-' then + 0 - stringToInt(s[1..]) + else + stringToInt(s[0..|s|-1])*10 + stringToInt(s[|s|-1..|s|]) + } +} \ No newline at end of file diff --git a/src/Parsers/parsersDSL.dfy b/src/Parsers/parsersDSL.dfy new file mode 100644 index 00000000..1de561ae --- /dev/null +++ b/src/Parsers/parsersDSL.dfy @@ -0,0 +1,72 @@ +include "parsers.dfy" + +// Nice wanna-to-be DSL to build parsers to avoid too much parenthesis nesting +// B(p) returns a parser builder from a normal parser. +// B1.o_I(B2) will parse both but return the result of B2 +// B1.I_o(B2) will parse both but return the result of B1 +// B.M(f) will map the result of the parser builder by f if succeeded +// B1.O(B2) will either parse B1, or B2 if B1 fails with Recoverable +// FirstOf([B1, B2, B3]) +// will parse with B1, but if B1 fails with Recoverable, +// it will parse with B2, but if B2 fails with Recoverable, +// it will parse with B3 +// R(v) returns a parser builder that returns immediately v +// +// There are more parser builders in the trait Engine, when their spec depends on +// a predetermined input, e.g. to tests for constant strings + +abstract module ParserBuilders { + import Parsers + + type Parser = Parsers.Parser + type FailureLevel = Parsers.FailureLevel + + // Wrap the constructor in a class where the size is constant so that users + // don'result need to provide it. + datatype B = B(apply: Parser) + { + opaque function e_I(other: B): (p: B) + // Excludes the left, includes the right + { + B(Parsers.ConcatR(apply, other.apply)) + } + opaque function I_o(other: B): (p: B) + // Includes the left, excludes the right + { + B(Parsers.ConcatL(apply, other.apply)) + } + opaque function M(mappingFunc: R -> U): (p: B) + // Maps the result + { + B(Parsers.Map(apply, mappingFunc)) + } + static function BS(result: R): (p: B) + { + B(Parsers.Succeed(result)) + } + + static function BF(message: string, level: FailureLevel := FailureLevel.Recoverable): (p: B) + { + B(Parsers.Fail(message, level)) + } + + static function O(alternatives: seq>): B + // Declares a set of alternatives as a single list + { + if |alternatives| == 0 then BF("no alternative") else + if |alternatives| == 1 then alternatives[0] + else + B(Parsers.Or(alternatives[0].apply, O(alternatives[1..]).apply)) + } + + opaque function Then(other: R -> B): (p: B) + { + B(Parsers.Bind(apply, (result: R) => other(result).apply)) + } + + opaque function Rep(init: R, combine: (R, R) -> R): (p: B) + { + B(Parsers.Rep(apply, combine, init)) + } + } +} diff --git a/src/Parsers/parsersDisplayers.dfy b/src/Parsers/parsersDisplayers.dfy new file mode 100644 index 00000000..caa245fe --- /dev/null +++ b/src/Parsers/parsersDisplayers.dfy @@ -0,0 +1,46 @@ +include "parsers.dfy" + +// From these parsers, we can create displayers +// and prove the roundtrip displayer / parser if we wanted to +abstract module ParsersDiplayers { + import Parsers + + type Parser = Parsers.Parser + type C = Parsers.C + + type Displayer<-R> = (R, seq) -> seq + + function Concat( + left: Displayer, + right: Displayer + ): Displayer<(A, B)> { + (ab: (A, B), remaining: seq) => + var remaining2 := right(ab.1, remaining); + var remaining3 := left(ab.0, remaining2); + remaining3 + } + + ghost predicate Roundtrip(parse: Parser, display: Displayer) + // The parser and the displayer are dual to each other + // means that if we parse after printing, we get the same result + { + forall a: A, remaining: seq :: + parse(display(a, remaining)) == Parsers.Success(a, remaining) + } + + lemma {:rlimit 1000} ConcatRoundtrip( + pA: Parser, ppA: Displayer, + pB: Parser, ppB: Displayer + ) + requires Roundtrip(pA, ppA) && Roundtrip(pB, ppB) + ensures Roundtrip(Parsers.Concat(pA, pB), Concat(ppA, ppB)) + { + reveal Parsers.Concat(); + var p := Parsers.Concat(pA, pB); + var d := Concat(ppA, ppB); + forall ab: (A, B), remaining: seq ensures + p(d(ab, remaining)) == Parsers.Success(ab, remaining) + { + } + } +} \ No newline at end of file diff --git a/src/Parsers/stringParsers.dfy b/src/Parsers/stringParsers.dfy new file mode 100644 index 00000000..baf85187 --- /dev/null +++ b/src/Parsers/stringParsers.dfy @@ -0,0 +1,142 @@ +include "parsers.dfy" + +module StringParsers refines Parsers { + type C = char + + // ################################## + // String-specific parser combinators + // ################################## + + opaque function CharTest(test: char -> bool, name: string): (p: Parser) + // A parser that returns the current char if it passes the test + // Returns a recoverable error based on the name otherwise + { + (input: string) => + if 0 < |input| && test(input[0]) then Success(input[0], input[1..]) + else Failure(Recoverable, + FailureData("expected a "+name, input, Option.None)) + } + + opaque function Char(expectedChar: char): (p: Parser) + // A parser that tests if the current char is the given expected char + { + CharTest((c: char) => c == expectedChar, [expectedChar]) + } + + opaque function Digit(): (p: Parser) + // A parser that tests if the current char is a digit and returns it + { + CharTest(c => c in "0123456789", "digit") + } + + opaque function DigitNumber(): (p: Parser) + // A parser that returns the current char as a number if it is one + { + Map(Digit(), (c: char) => + var n: nat := (if StringNat.IsStringNat([c]) then // Should always be true + StringNat.stringToNat([c]) + else 0); n + ) + } + + opaque function Nat(): (p: Parser) + // A parser that parses a natural number + { + Bind(DigitNumber(), + (result: nat) => + Rep(DigitNumber(), + (previous: nat, c: nat) => + var r: nat := previous * 10 + c; r, + result + ) + ) + } + + opaque function Int(): (p: Parser) + // A parser that parses a integer, possibly negative + { + Bind(Maybe(Char('-')), + (minusSign: Option) => + Map(Nat(), (result: nat) => if minusSign.Some? then 0-result else result)) + } + + opaque function String(expected: string): (p: Parser) + // A parser that succeeds only if the input starts with the given string + { + (input: string) => + if |expected| <= |input| && input[0..|expected|] == expected then Success(expected, input[|expected|..]) + else Failure(Recoverable, FailureData("expected '"+expected+"'", input, Option.None)) + } + + // ######################## + // Error handling utilities + // ######################## + + function repeat_(str: string, n: nat): (r: string) + // Repeats the given string n times + ensures |r| == |str| * n + { + if n == 0 then "" + else str + repeat_(str, n-1) + } + + method ExtractLineCol(input: string, pos: nat) + returns (lineNumber: nat, lineStr: string, colNumber: nat) + // Returns the line number, the extracted line, and the column number + // corresponding to a given position in the given input + { + lineNumber := 1; + var startLinePos: nat := 0; + colNumber := 0; + var i := 0; + while i < |input| && i != pos + invariant 0 <= startLinePos <= i <= |input| + { + colNumber := colNumber + 1; + if input[i] == '\r' && i + 1 < |input| && input[i+1] == '\n' { + lineNumber := lineNumber + 1; + colNumber := 0; + i := i + 1; + startLinePos := i + 1; + } else if input[i] in "\r\n" { + lineNumber := lineNumber + 1; + colNumber := 0; + startLinePos := i + 1; + } + i := i + 1; + } + while i < |input| && input[i] !in "\r\n" + invariant startLinePos <= i <= |input| + { + i := i + 1; + } + lineStr := input[startLinePos..i]; + } + + method PrintFailure(input: string, result: ParseResult, printPos: int := -1) + // Util to print the line, the column, and all the error messages + // associated to a given parse failure + requires result.Failure? + decreases result.data + { + if printPos == -1 { + print if result.level == Fatal then "Fatal error" else "Error", ":\n"; + } + var pos: int := |input| - |result.data.remaining|; // Need the parser to be Valid() + if pos < 0 { // Could be proved false if parser is Valid() + pos := 0; + } + if printPos != pos { + var line, lineStr, col := ExtractLineCol(input, pos); + print line, ": ", lineStr, "\n"; + print repeat_(" ", col + 2 + |intToString(line)|), "^","\n"; + } + print result.data.message; + if result.data.next.Some? { + print ", or\n"; + PrintFailure(input, Failure(result.level, result.data.next.value), pos); + } else { + print "\n"; + } + } +} \ No newline at end of file From 3be766e405765f94a10a1ba0f39c8d06d5696709 Mon Sep 17 00:00:00 2001 From: Mikael Mayer Date: Tue, 14 Nov 2023 16:34:23 -0600 Subject: [PATCH 11/22] Arithmetic builders DSL --- src/Parsers/exampleArithmetic.dfy | 140 ++++++------------ src/Parsers/examples/arithmeticBuilders.dfy | 52 +++++++ src/Parsers/library.dfy | 37 ----- src/Parsers/parsers.dfy | 53 ++++++- src/Parsers/parsersBuilders.dfy | 122 +++++++++++++++ src/Parsers/parsersDSL.dfy | 72 --------- src/Parsers/parsersDisplayers.dfy | 2 +- .../{parserTests.dfy => parsersTests.dfy} | 3 - src/Parsers/stringParsers.dfy | 22 ++- src/Parsers/stringParsersBuilders.dfy | 15 ++ 10 files changed, 304 insertions(+), 214 deletions(-) create mode 100644 src/Parsers/examples/arithmeticBuilders.dfy delete mode 100644 src/Parsers/library.dfy create mode 100644 src/Parsers/parsersBuilders.dfy delete mode 100644 src/Parsers/parsersDSL.dfy rename src/Parsers/{parserTests.dfy => parsersTests.dfy} (99%) create mode 100644 src/Parsers/stringParsersBuilders.dfy diff --git a/src/Parsers/exampleArithmetic.dfy b/src/Parsers/exampleArithmetic.dfy index f9e7456c..12e37248 100644 --- a/src/Parsers/exampleArithmetic.dfy +++ b/src/Parsers/exampleArithmetic.dfy @@ -1,93 +1,9 @@ -include "stringParsers.dfy" +include "stringParsersBuilders.dfy" -module ArithmeticParser { +module PolynomialParser { import opened StringParsers - import opened StringNat - // Pure functional style - const parser: Parser - := ConcatL( - RecursiveMap( - map[ - "atom" := - RecursiveDef(0, (callback: ParserSelector) => - Or(ConcatR( - String("("), ConcatL( - callback("term"), - String(")"))), - Or( - Map(Int(), (result: int) => Number(result)), ConcatR( - String("x"), - Map(Maybe(ConcatR( - String("^"), Int())), - (result: Option) => - if result.Some? then Unknown(result.value) else Unknown(1) - ))))), - "factor" := - RecursiveDef(1, (callback: ParserSelector) => - Bind(callback("atom"), (atom: Expression) => - Rep( - Concat(Or(String("*"), Or(String("/"), String("%"))), - callback("atom")), - Expression.InfixBuilder(), atom) - ) - ), - "term" := - RecursiveDef(2, (callback: ParserSelector) => - Bind(callback("factor"), (factor: Expression) => - Rep( - Concat(Or(String("+"), String("-")), - callback("factor")), - Expression.InfixBuilder(), factor) - ) - ) - ], - "term" - ), EndOfString()) - - // DSL style - const parserDSL: Parser - := ConcatL( - RecursiveMap( - map[ - "atom" := - RecursiveDef(0, (callback: ParserSelector) => - Or(ConcatR( - String("("), ConcatL( - callback("term"), - String(")"))), - Or( - Map(Int(), (result: int) => Number(result)), ConcatR( - String("x"), - Map(Maybe(ConcatR( - String("^"), Int())), - (result: Option) => - if result.Some? then Unknown(result.value) else Unknown(1) - ))))), - "factor" := - RecursiveDef(1, (callback: ParserSelector) => - Bind(callback("atom"), (atom: Expression) => - Rep( - Concat(Or(String("*"), Or(String("/"), String("%"))), - callback("atom")), - Expression.InfixBuilder(), atom) - ) - ), - "term" := - RecursiveDef(2, (callback: ParserSelector) => - Bind(callback("factor"), (factor: Expression) => - Rep( - Concat(Or(String("+"), String("-")), - callback("factor")), - Expression.InfixBuilder(), factor) - ) - ) - ], - "term" - ), EndOfString()) - - - type Result = StringParsers.Wrappers.Result + type Result = Wrappers.Result datatype Expression = | Binary(op: string, left: Expression, right: Expression) @@ -132,22 +48,58 @@ module ArithmeticParser { function ToString(): string { match this - case Number(x) => (if x < 0 then "-" else "") + StringNat.natToString(if x < 0 then -x else x) + case Number(x) => (if x < 0 then "-" else "") + StringParsers.intToString(if x < 0 then -x else x) case Binary(op, left, right) => "(" + left.ToString() + op + right.ToString() + ")" case Unknown(power) => if power == 1 then "x" else if power == 0 then "1" else - if power < 0 then "x^(-" + StringNat.natToString(0-power)+")" else - "x^" + StringNat.natToString(power) + if power < 0 then "x^(-" + StringParsers.intToString(0-power)+")" else + "x^" + StringParsers.intToString(power) } } - const buildParsedExpr: ((Expression, Wrappers.Option<(string, Expression)>)) -> Expression - := ((result: (Expression, Wrappers.Option<(string, Expression)>)) => - if result.1.None? then result.0 else - Binary(result.1.value.0, result.0, result.1.value.1)) + // Pure functional style + const parser: Parser + := ConcatL( + RecursiveMap( + map[ + "atom" := + RecursiveDef(0, (callback: ParserSelector) => + Or(ConcatR( + String("("), ConcatL( + callback("term"), + String(")"))), + Or( + Map(Int(), (result: int) => Number(result)), ConcatR( + String("x"), + Map(Maybe(ConcatR( + String("^"), Int())), + (result: Option) => + if result.Some? then Unknown(result.value) else Unknown(1) + ))))), + "factor" := + RecursiveDef(1, (callback: ParserSelector) => + Bind(callback("atom"), (atom: Expression) => + Rep( + Concat(Or(String("*"), Or(String("/"), String("%"))), + callback("atom")), + Expression.InfixBuilder(), atom) + ) + ), + "term" := + RecursiveDef(2, (callback: ParserSelector) => + Bind(callback("factor"), (factor: Expression) => + Rep( + Concat(Or(String("+"), String("-")), + callback("factor")), + Expression.InfixBuilder(), factor) + ) + ) + ], + "term" + ), EndOfString()) method Main(args: seq) { if |args| <= 1 { diff --git a/src/Parsers/examples/arithmeticBuilders.dfy b/src/Parsers/examples/arithmeticBuilders.dfy new file mode 100644 index 00000000..c74b49c6 --- /dev/null +++ b/src/Parsers/examples/arithmeticBuilders.dfy @@ -0,0 +1,52 @@ +include "../stringParsersBuilders.dfy" + + +module PolynomialParsersBuilder { + import opened StringParsersBuilders + + type Result = StringParsersBuilders.P.Wrappers.Result + + datatype Expression = + | Binary(op: string, left: Expression, right: Expression) + | Number(value: int) + | Unknown(power: int) + { + static function InfixBuilder(): (Expression, (string, Expression)) -> Expression + { + (left: Expression, right: (string, Expression)) => Binary(right.0, left, right.1) + } + } + + // DSL style + const parserDSL: B + := + Rec( + map[ + "atom" := + RecDef(0, (c: Sel) => + O([ + String("(").e_I(c("term")).I_e(String(")")), + Int().M((result: int) => Number(result)), + String("x").e_I(String("^").e_I(Int()).Maybe().M( + (result: StringParsersBuilders.P.Option) => + if result.Some? then Unknown(result.value) else Unknown(1))) + ])), + "factor" := + RecDef(1, (c: Sel) => + c("atom").Then((atom: Expression) => // TODO: Finish this one + O([ + String("*"), + String("/"), + String("%") + ]).I_I(c("atom")).Rep(atom, Expression.InfixBuilder()))), + "term" := + RecDef(1, (c: Sel) => + c("factor").Then((atom: Expression) => + O([ + String("+"), + String("-") + ]).I_I(c("factor")).Rep(atom, Expression.InfixBuilder()))) + ], + "term" + ).I_e(End()) +} \ No newline at end of file diff --git a/src/Parsers/library.dfy b/src/Parsers/library.dfy deleted file mode 100644 index e21500c2..00000000 --- a/src/Parsers/library.dfy +++ /dev/null @@ -1,37 +0,0 @@ -module StringNat { - predicate IsStringNat(s: string) { - |s| > 0 && (|s| > 1 ==> s[0] != '0') && - forall i | 0 <= i < |s| :: s[i] in "0123456789" - } - - type stringNat = s: string | - IsStringNat(s) - witness "1" - - function natToString(n: nat): stringNat { - match n - case 0 => "0" case 1 => "1" case 2 => "2" case 3 => "3" case 4 => "4" - case 5 => "5" case 6 => "6" case 7 => "7" case 8 => "8" case 9 => "9" - case _ => natToString(n / 10) + natToString(n % 10) - } - - function stringToNat(s: stringNat): nat - decreases |s| - { - if |s| == 1 then - match s[0] - case '0' => 0 case '1' => 1 case '2' => 2 case '3' => 3 case '4' => 4 - case '5' => 5 case '6' => 6 case '7' => 7 case '8' => 8 case '9' => 9 - else - stringToNat(s[..|s|-1])*10 + stringToNat(s[|s|-1..|s|]) - } - - lemma natToStringThenStringToNatIdem(n: nat) - ensures stringToNat(natToString(n)) == n - { // Proof is automatic - } - lemma stringToNatThenNatToStringIdem(n: stringNat) - ensures natToString(stringToNat(n)) == n - { // Proof is automatic - } -} \ No newline at end of file diff --git a/src/Parsers/parsers.dfy b/src/Parsers/parsers.dfy index 693ef248..9776c411 100644 --- a/src/Parsers/parsers.dfy +++ b/src/Parsers/parsers.dfy @@ -1,5 +1,4 @@ include "../Wrappers.dfy" -include "library.dfy" abstract module Parsers // Functional parsers consuming sequences seq from the left to the right. @@ -7,6 +6,52 @@ abstract module Parsers { import Wrappers + export + provides C, // The character type + Wrappers, // Imported module + Valid, + Succeed, + Epsilon, + Fail, + EndOfString, + Bind, + BindSucceeds, + BindResult, + Map, + Not, + And, + Or, + OrSeq, + Lookahead, + ?, + If, + Maybe, + ConcatMap, + Concat, + ConcatL, + ConcatR, + RepSeq, + Rep, + Recursive, + RecursiveMap, + Debug, + intToString, + digitToInt, + stringToInt, + ParseResult.IsFailure, + ParseResult.PropagateFailure, + ParseResult.Extract + reveals + Parser, + ParserSelector, + Option, // From Wrappers + FailureLevel, + ParseResult, + FailureData, + RecursiveDef + + export All reveals * + type C(!new, ==) // The character of the sequence being parsed @@ -509,7 +554,7 @@ abstract module Parsers return message; } - opaque function DebugParser(msg: string, other: Parser): (p: Parser) + opaque function Debug(msg: string, other: Parser): (p: Parser) // A parser that, when invoked, will print a message before applying its underlying parser // and also afterwards { @@ -521,6 +566,7 @@ abstract module Parsers } opaque function intToString(n: int): string + // Converts an integer to a string decreases if n < 0 then 1 - n else n { if n < 0 then "-" + intToString(-n) else @@ -534,10 +580,11 @@ abstract module Parsers match c case '0' => 0 case '1' => 1 case '2' => 2 case '3' => 3 case '4' => 4 case '5' => 5 case '6' => 6 case '7' => 7 case '8' => 8 case '9' => 9 - case _ => 0 + case _ => -1 } opaque function stringToInt(s: string): int + // Converts a string to a string decreases |s| { if |s| == 0 then 0 else diff --git a/src/Parsers/parsersBuilders.dfy b/src/Parsers/parsersBuilders.dfy new file mode 100644 index 00000000..bd4557fe --- /dev/null +++ b/src/Parsers/parsersBuilders.dfy @@ -0,0 +1,122 @@ +include "parsers.dfy" + +// Nice wanna-to-be DSL to build parsers to avoid too much parenthesis nesting +// B(p) returns a parser builder from a normal parser. +// B1.o_I(B2) will parse both but return the result of B2 +// B1.I_o(B2) will parse both but return the result of B1 +// B.M(f) will map the result of the parser builder by f if succeeded +// B1.O(B2) will either parse B1, or B2 if B1 fails with Recoverable +// FirstOf([B1, B2, B3]) +// will parse with B1, but if B1 fails with Recoverable, +// it will parse with B2, but if B2 fails with Recoverable, +// it will parse with B3 +// R(v) returns a parser builder that returns immediately v +// +// There are more parser builders in the trait Engine, when their spec depends on +// a predetermined input, e.g. to tests for constant strings + +abstract module ParserBuilders { + import P: Parsers + export + provides P + provides O + provides Ok + provides Fail + provides Rec + provides B.e_I + provides B.I_e + provides B.I_I + provides B.M + provides B.Maybe + provides B.Then + provides B.Rep + provides End + reveals B + reveals RecDef, FailureLevel, Sel + + type FailureLevel = P.FailureLevel + type Sel = string -> B + + // Wrap the constructor in a class where the size is constant so that users + // don'result need to provide it. + datatype B = B(apply: P.Parser) + { + function Maybe(): B> { + B(P.Maybe(apply)) + } + function e_I(other: B): (p: B) + // Excludes the left, includes the right + { + B(P.ConcatR(apply, other.apply)) + } + function I_e(other: B): (p: B) + // Includes the left, excludes the right + { + B(P.ConcatL(apply, other.apply)) + } + function I_I(other: B): (p: B<(R, U)>) + // Includes the left, excludes the right + { + B(P.Concat(apply, other.apply)) + } + function M(mappingFunc: R -> U): (p: B) + // Maps the result + { + B(P.Map(apply, mappingFunc)) + } + function Then(other: R -> B): (p: B) + { + B(P.Bind(apply, (result: R) => other(result).apply)) + } + + function Rep(init: A, combine: (A, R) -> A): (p: B) + { + B(P.Rep(apply, combine, init)) + } + } + + function Ok(result: R): (p: B) + { + B(P.Succeed(result)) + } + + function Fail(message: string, level: FailureLevel := FailureLevel.Recoverable): (p: B) + { + B(P.Fail(message, level)) + } + + function O(alternatives: seq>): B + // Declares a set of alternatives as a single list + { + if |alternatives| == 0 then Fail("no alternative") else + if |alternatives| == 1 then alternatives[0] + else + B(P.Or(alternatives[0].apply, O(alternatives[1..]).apply)) + } + + function End(): B<()> + { + B(P.EndOfString()) + } + + datatype RecDef = RecDef( + order: nat, + definition: Sel -> B) + + opaque function Rec( + underlying: map>, + fun: string): (p: B) + { + B(P.RecursiveMap( + map k <- underlying :: k := + P.RecursiveDef( + underlying[k].order, + (selector: P.ParserSelector) => + underlying[k].definition( + (name: string) => + B(selector(name)) + ).apply), + fun + )) + } +} diff --git a/src/Parsers/parsersDSL.dfy b/src/Parsers/parsersDSL.dfy deleted file mode 100644 index 1de561ae..00000000 --- a/src/Parsers/parsersDSL.dfy +++ /dev/null @@ -1,72 +0,0 @@ -include "parsers.dfy" - -// Nice wanna-to-be DSL to build parsers to avoid too much parenthesis nesting -// B(p) returns a parser builder from a normal parser. -// B1.o_I(B2) will parse both but return the result of B2 -// B1.I_o(B2) will parse both but return the result of B1 -// B.M(f) will map the result of the parser builder by f if succeeded -// B1.O(B2) will either parse B1, or B2 if B1 fails with Recoverable -// FirstOf([B1, B2, B3]) -// will parse with B1, but if B1 fails with Recoverable, -// it will parse with B2, but if B2 fails with Recoverable, -// it will parse with B3 -// R(v) returns a parser builder that returns immediately v -// -// There are more parser builders in the trait Engine, when their spec depends on -// a predetermined input, e.g. to tests for constant strings - -abstract module ParserBuilders { - import Parsers - - type Parser = Parsers.Parser - type FailureLevel = Parsers.FailureLevel - - // Wrap the constructor in a class where the size is constant so that users - // don'result need to provide it. - datatype B = B(apply: Parser) - { - opaque function e_I(other: B): (p: B) - // Excludes the left, includes the right - { - B(Parsers.ConcatR(apply, other.apply)) - } - opaque function I_o(other: B): (p: B) - // Includes the left, excludes the right - { - B(Parsers.ConcatL(apply, other.apply)) - } - opaque function M(mappingFunc: R -> U): (p: B) - // Maps the result - { - B(Parsers.Map(apply, mappingFunc)) - } - static function BS(result: R): (p: B) - { - B(Parsers.Succeed(result)) - } - - static function BF(message: string, level: FailureLevel := FailureLevel.Recoverable): (p: B) - { - B(Parsers.Fail(message, level)) - } - - static function O(alternatives: seq>): B - // Declares a set of alternatives as a single list - { - if |alternatives| == 0 then BF("no alternative") else - if |alternatives| == 1 then alternatives[0] - else - B(Parsers.Or(alternatives[0].apply, O(alternatives[1..]).apply)) - } - - opaque function Then(other: R -> B): (p: B) - { - B(Parsers.Bind(apply, (result: R) => other(result).apply)) - } - - opaque function Rep(init: R, combine: (R, R) -> R): (p: B) - { - B(Parsers.Rep(apply, combine, init)) - } - } -} diff --git a/src/Parsers/parsersDisplayers.dfy b/src/Parsers/parsersDisplayers.dfy index caa245fe..b1838a17 100644 --- a/src/Parsers/parsersDisplayers.dfy +++ b/src/Parsers/parsersDisplayers.dfy @@ -3,7 +3,7 @@ include "parsers.dfy" // From these parsers, we can create displayers // and prove the roundtrip displayer / parser if we wanted to abstract module ParsersDiplayers { - import Parsers + import Parsers`All type Parser = Parsers.Parser type C = Parsers.C diff --git a/src/Parsers/parserTests.dfy b/src/Parsers/parsersTests.dfy similarity index 99% rename from src/Parsers/parserTests.dfy rename to src/Parsers/parsersTests.dfy index 70f18e42..4fc98360 100644 --- a/src/Parsers/parserTests.dfy +++ b/src/Parsers/parsersTests.dfy @@ -1,9 +1,6 @@ include "parsers.dfy" -//include "../NonLinearArithmetic/DivMod.dfy" abstract module ParserTests refines Parsers { - //import DivMod - lemma AboutSucceed(result: R, input: seq) ensures var p := Succeed(result); diff --git a/src/Parsers/stringParsers.dfy b/src/Parsers/stringParsers.dfy index baf85187..7aa7c45e 100644 --- a/src/Parsers/stringParsers.dfy +++ b/src/Parsers/stringParsers.dfy @@ -1,6 +1,20 @@ include "parsers.dfy" module StringParsers refines Parsers { + export StringParsers extends Parsers + provides + CharTest, + Char, + Digit, + DigitNumber, + Nat, + Int, + String, + ExtractLineCol, + PrintFailure, + Wrappers + reveals C + type C = char // ################################## @@ -33,10 +47,10 @@ module StringParsers refines Parsers { // A parser that returns the current char as a number if it is one { Map(Digit(), (c: char) => - var n: nat := (if StringNat.IsStringNat([c]) then // Should always be true - StringNat.stringToNat([c]) - else 0); n - ) + var d := digitToInt(c); + var n: nat := if d >= 0 then d else 0; + n + ) } opaque function Nat(): (p: Parser) diff --git a/src/Parsers/stringParsersBuilders.dfy b/src/Parsers/stringParsersBuilders.dfy new file mode 100644 index 00000000..e6cbb8ab --- /dev/null +++ b/src/Parsers/stringParsersBuilders.dfy @@ -0,0 +1,15 @@ +include "stringParsers.dfy" +include "parsersBuilders.dfy" + +module StringParsersBuilders refines ParserBuilders { + import P = StringParsers + export StringParsersBuilders extends ParserBuilders + provides String, Int + + function String(s: string): B { + B(P.String(s)) + } + function Int(): B { + B(P.Int()) + } +} \ No newline at end of file From 7502ff839a89b06c6fd142fce44ec22faf83626f Mon Sep 17 00:00:00 2001 From: Mikael Mayer Date: Wed, 15 Nov 2023 00:43:38 -0600 Subject: [PATCH 12/22] Better readiness for review --- src/Parsers/README.md | 136 +++++++++--------- src/Parsers/exampleArithmetic.dfy | 129 ----------------- src/Parsers/examples/Tutorial.dfy | 81 +++++++++++ src/Parsers/examples/arithmeticBuilders.dfy | 52 ------- src/Parsers/examples/polynomialParser.dfy | 119 +++++++++++++++ .../examples/polynomialParserBuilder.dfy | 112 +++++++++++++++ src/Parsers/parsers.dfy | 18 +++ src/Parsers/parsersBuilders.dfy | 22 ++- src/Parsers/stringParsers.dfy | 15 +- 9 files changed, 421 insertions(+), 263 deletions(-) delete mode 100644 src/Parsers/exampleArithmetic.dfy create mode 100644 src/Parsers/examples/Tutorial.dfy delete mode 100644 src/Parsers/examples/arithmeticBuilders.dfy create mode 100644 src/Parsers/examples/polynomialParser.dfy create mode 100644 src/Parsers/examples/polynomialParserBuilder.dfy diff --git a/src/Parsers/README.md b/src/Parsers/README.md index 846190a0..cd72f5c0 100644 --- a/src/Parsers/README.md +++ b/src/Parsers/README.md @@ -1,84 +1,92 @@ -# Verified Parser Combinator Library in Dafny +# Verified Parser Combinators + +Parser combinators in Dafny, inspired from the model (Meijer 1996). + +This library offers two styles of functional parser combinators. + +- The first parsers style is a synonym for `seq -> ParseResult` that supports monadic styles, is straightforward to use, but results in lots of closing parentheses. + +- The second parsers style is a datatype wrapper around the first style, which enable to define functions as infix or suffixes, which makes parsers sometimes easier to read and helps decreasing nesting. + +## Library usage + +The tutorial in [`Tutorial.dfy`](examples/Tutorial.dfy) shows how to import the library call the two parsers style API, apply the parser to a string, and also use the PrintFailure to pretty print the failure along with the line/col where it occurred. To view a full example of how to use the parser combinator library, especially how to define a recursive parser that is guaranteed to terminate, -please refer to the file `exampleArithmetic.dfy`, which parses -an arithmetic expression. +please refer to the files [`polynomialParser.dfy`](examples/polynomialParser.dfy) and [`polynomialParserBuilders.dfy`](examples/polynomialParserBuilder.dfy), which both parse polynomial expressions. -To get started, first you need to import the parser, I recommend: +As a quick walkthrough, here is a test to parse a Tic-tac-toe grid: ``` -include "parser.dfy" -import opened Parsers -import ParserEngine +method {:test} TestTicTacToe() { + var x := OrSeq([ + String("O"), String("X"), String(" ") + ]); + var v := String("|"); + var row := Concat(x, ConcatR(v, Concat(x, ConcatR(v, x)))); + var sep := String("\n-+-+-\n"); + var grid := + Concat(row, ConcatR(sep, Concat(row, ConcatR(sep, row)))); + var input := "O|X| \n-+-+-\nX|O| \n-+-+-\nP| |O"; + // 012345 678901 234567 890123 45678 + var r := grid(input); + expect r.IsFailure(); + PrintFailure(input, r); +} ``` - -Then, define a class that extends `ParserEngine.Engine` and defines -the input string at the same time: +it displays the following: ``` - class MyParserEngine extends ParserEngine.Engine { - constructor(input: string) { - this.input := input; - } - } +Error: +5: P| |O + ^ +expected 'O', or +expected 'X', or +expected ' ' ``` +## What is verified? -A parser is a partial function that takes a position and returns a `ParseResult`. Errors have two levels, recoverable or not. +Despite combinators enabling to define mutually recursive parsers (`RecursiveMap`, `Recursive`), Dafny will always check termination. When using recursive combinators, termination is checked at run-time so it does not prevent quick prototyping and iterations, and error messages about non-termination are always precise (either the ordering, or the progression). -> ``` -> type Parser<+T> = nat --> ParseResult -> -> datatype ParseResult<+T> = -> | PFailure(level: FailureLevel, message: string, pos: nat) -> | PSuccess(pos: nat, t: T) -> -> datatype FailureLevel = Error | Recoverable -> ``` +This library offers a predicate on parsers of the first style `Valid()`, which +indicates that such parsers will never raise a fatal result, and will always return a +string that is suffix of the string they are given as input. Many combinators have +a proof that, if their inputs are Valid(), then their result is Valid(). +Checking validity statically could help design parsers that do even less checks at run-time, but it has not been developed in this library. -In this class, you can define parsers yourself, or use building blocks. -For example, +This library also offers a dual type to parser, named Displayer, which is `(Result, seq) -> seq`. It only defines the dual of the Concat parser combinator and proves the roundtrip to be the identity. Because Dafny does not offer +compilable predicate to check that a datatype constructor is included in another one, +writing combinators for this kind of parser dual is difficult. -``` - function method ParseId?(): Parser - { - While?((pos: nat) requires pos <= |input| => - pos < |input| && input[pos] in "azertyuiopqsdfghjklmwxcvbnAZERTYUIOPQSDFGHJKLMWXCVBN_7894561230" - ) - } - - function method ParseId(): Parser - { - Or(ParseId?(), Fail("Expected identifier", Error)) - } - - function method ParseField(): Parser - { - Bind(Concat(ParseId(), ConcatR(Const(":"), ParseId())), - (result: (ID, ID), newPos: nat) => Succeed(Field(result.0, result.1))) - } - // It's the same as using Map() instead of Bind(), and removing the "Succeed(" and the `newPos` parameter) - - datatype FieldDeclaration = Field(name: ID, value: ID) -``` +## Relationship to JSON parsers -To invoke your function, define a main method like this: +The JSON parser is very specialized and the type of the parsers combinators it is using is actually a subset type. +Subset types are known to be a source of proof brittleness, +so this library design is not using subset types. +That said, it is possible to create an adapter around a JSON parser to make it a parser of this library. -``` +# Caveats -method Main() { - var content = "happy:code"; - - var t := new MyParserEngine(content); - var parseResult := t.ParseField()(0); - if parseResult.PFailure? { - t.ReportError(parseResult); // Nice error reporting message with quoting the line and position of failure, along with the message - return; - } - var result := parseResult.t; - print result; -} -``` +- Recursive parsers will consume stack and, in programming languages that have a finite amount of stack, programs can get out of memory. Prefer `Rep` and `RepSeq` as much as possible as they are tail-recursive. + +# Implementation notes + +The module hierarchy is as follow: +``` +abstract module Parsers { + type C +} +module StringParsers { + type C: Char +} +abstract module ParsersBuilders { + import P: Parsers +} +module StringParsersBuilders { + import P = StringParsers +} +``` \ No newline at end of file diff --git a/src/Parsers/exampleArithmetic.dfy b/src/Parsers/exampleArithmetic.dfy deleted file mode 100644 index 12e37248..00000000 --- a/src/Parsers/exampleArithmetic.dfy +++ /dev/null @@ -1,129 +0,0 @@ -include "stringParsersBuilders.dfy" - -module PolynomialParser { - import opened StringParsers - - type Result = Wrappers.Result - - datatype Expression = - | Binary(op: string, left: Expression, right: Expression) - | Number(value: int) - | Unknown(power: int) - { - - function Simplify(): Result { - match this { - case Number(x: int) => Result.Success(this) - case Binary(op, left, right) => - var l :- left.Simplify(); - var r :- right.Simplify(); - if l.Number? && r.Number? then - match op { - case "+" => Result.Success(Number(l.value + r.value)) - case "-" => Result.Success(Number(l.value - r.value)) - case "*" => Result.Success(Number(l.value * r.value)) - case "/" => - if r.value == 0 then Result.Failure("Division by zero (" + right.ToString() + " evaluates to zero)") else - Result.Success(Number(l.value / r.value)) - case "%" => - if r.value == 0 then Result.Failure("Modulo by zero (" + right.ToString() + " evaluates to zero)") else - Result.Success(Number(l.value % r.value)) - case _ => Result.Failure("Unsupported operator: " + op) - } - else - Result.Success(Binary(op, l, r)) - case Unknown(0) => Result.Success(Number(1)) - case Unknown(_) => - Result.Success(this) - } - } - static function BinaryBuilder(op: string): (Expression, Expression) -> Expression - { - (left: Expression, right: Expression) => Binary(op, left, right) - } - static function InfixBuilder(): (Expression, (string, Expression)) -> Expression - { - (left: Expression, right: (string, Expression)) => Binary(right.0, left, right.1) - } - function ToString(): string - { - match this - case Number(x) => (if x < 0 then "-" else "") + StringParsers.intToString(if x < 0 then -x else x) - case Binary(op, left, right) => - "(" - + left.ToString() + op + right.ToString() - + ")" - case Unknown(power) => - if power == 1 then "x" else if power == 0 then "1" else - if power < 0 then "x^(-" + StringParsers.intToString(0-power)+")" else - "x^" + StringParsers.intToString(power) - } - } - - // Pure functional style - const parser: Parser - := ConcatL( - RecursiveMap( - map[ - "atom" := - RecursiveDef(0, (callback: ParserSelector) => - Or(ConcatR( - String("("), ConcatL( - callback("term"), - String(")"))), - Or( - Map(Int(), (result: int) => Number(result)), ConcatR( - String("x"), - Map(Maybe(ConcatR( - String("^"), Int())), - (result: Option) => - if result.Some? then Unknown(result.value) else Unknown(1) - ))))), - "factor" := - RecursiveDef(1, (callback: ParserSelector) => - Bind(callback("atom"), (atom: Expression) => - Rep( - Concat(Or(String("*"), Or(String("/"), String("%"))), - callback("atom")), - Expression.InfixBuilder(), atom) - ) - ), - "term" := - RecursiveDef(2, (callback: ParserSelector) => - Bind(callback("factor"), (factor: Expression) => - Rep( - Concat(Or(String("+"), String("-")), - callback("factor")), - Expression.InfixBuilder(), factor) - ) - ) - ], - "term" - ), EndOfString()) - - method Main(args: seq) { - if |args| <= 1 { - return; - } - for i := 1 to |args| { - var input := args[i]; - match parser(input) { - case Success(result, remaining) => - if |remaining| != 0 { - print "'" + remaining +"'", "\n"; - PrintFailure(input, Failure(Recoverable, FailureData("Expected end of string", remaining, Option.None))); - } - print "Computation:", result.ToString(), "\n"; - match result.Simplify() { - case Success(x) => - print "Result:", x.ToString(), "\n"; - case Failure(message) => - print message; - } - case failure => - PrintFailure(input, failure); - } - print "\n"; - } - } -} \ No newline at end of file diff --git a/src/Parsers/examples/Tutorial.dfy b/src/Parsers/examples/Tutorial.dfy new file mode 100644 index 00000000..7a72cf1c --- /dev/null +++ b/src/Parsers/examples/Tutorial.dfy @@ -0,0 +1,81 @@ +include "../stringParsersBuilders.dfy" + +module Tutorial.Parsers { + import opened StringParsers + + method {:test} TestSplit1() { + var nonComma: Parser := + Many((c: char) => c != ',', "non-comma"); + var p := + Bind(nonComma, (result: string) => + Rep(ConcatR(String(","), nonComma), + (acc, elem) => acc + [elem], + [result] + )); + + expect p("abc,d,efg") == ParseResult.Success(["abc","d","efg"], ""); + expect p("abc,d,,") == + ParseResult.Failure(Recoverable, FailureData("expected a non-comma", ",", Option.None)); + PrintFailure("abc,d,,", p("abc,d,,")); + // Displays + // Error: + // 1: abc,d,, + // ^ + // expected a non-comma + } + + function flatten(): ((A, (A, A))) -> (A, A, A) { + (input: (A, (A, A))) => + (input.0, input.1.0, input.1.1) + } + + method {:test} TestTicTacToe() { + var x := OrSeq([ + String("O"), String("X"), String(" ") + ]); + var v := String("|"); + var row := Map(Concat(x, ConcatR(v, Concat(x, ConcatR(v, x)))), + flatten()); + var sep := String("\n-+-+-\n"); + var grid := Map( + Concat(row, ConcatR(sep, Concat(row, ConcatR(sep, row)))), + flatten<(string, string, string)>()) + ; + var input := "O|X| \n-+-+-\nX|O| \n-+-+-\nP| |O"; + // 012345 678901 234567 890123 45678 + var r := grid(input); + expect r.IsFailure(); + expect |input| - |r.data.remaining| == 24; + expect r.data.message == "expected 'O'"; + expect r.data.next.Some?; + expect r.data.next.value.message == "expected 'X'"; + expect r.data.next.value.next.Some?; + expect r.data.next.value.next.value.message == "expected ' '"; + expect r.data.next.value.next.value.next.None?; + PrintFailure(input, r); + // Displays: + // Error: + // 5: P| |O + // ^ + // expected 'O', or + // expected 'X', or + // expected ' ' + } +} + + +module Tutorial.ParsersBuilders { + import opened StringParsersBuilders + + method {:test} TestSplit1() { + var nonComma: B := + Many((c: char) => c != ',', "non-comma"); + var p := + nonComma.Bind((result: string) => + String(",").e_I(nonComma).Rep([result], + (acc: seq, elem: string) => acc + [elem] + )); + + expect p.apply("abc,d,efg") == P.ParseResult.Success(["abc","d","efg"], ""); + } +} \ No newline at end of file diff --git a/src/Parsers/examples/arithmeticBuilders.dfy b/src/Parsers/examples/arithmeticBuilders.dfy deleted file mode 100644 index c74b49c6..00000000 --- a/src/Parsers/examples/arithmeticBuilders.dfy +++ /dev/null @@ -1,52 +0,0 @@ -include "../stringParsersBuilders.dfy" - - -module PolynomialParsersBuilder { - import opened StringParsersBuilders - - type Result = StringParsersBuilders.P.Wrappers.Result - - datatype Expression = - | Binary(op: string, left: Expression, right: Expression) - | Number(value: int) - | Unknown(power: int) - { - static function InfixBuilder(): (Expression, (string, Expression)) -> Expression - { - (left: Expression, right: (string, Expression)) => Binary(right.0, left, right.1) - } - } - - // DSL style - const parserDSL: B - := - Rec( - map[ - "atom" := - RecDef(0, (c: Sel) => - O([ - String("(").e_I(c("term")).I_e(String(")")), - Int().M((result: int) => Number(result)), - String("x").e_I(String("^").e_I(Int()).Maybe().M( - (result: StringParsersBuilders.P.Option) => - if result.Some? then Unknown(result.value) else Unknown(1))) - ])), - "factor" := - RecDef(1, (c: Sel) => - c("atom").Then((atom: Expression) => // TODO: Finish this one - O([ - String("*"), - String("/"), - String("%") - ]).I_I(c("atom")).Rep(atom, Expression.InfixBuilder()))), - "term" := - RecDef(1, (c: Sel) => - c("factor").Then((atom: Expression) => - O([ - String("+"), - String("-") - ]).I_I(c("factor")).Rep(atom, Expression.InfixBuilder()))) - ], - "term" - ).I_e(End()) -} \ No newline at end of file diff --git a/src/Parsers/examples/polynomialParser.dfy b/src/Parsers/examples/polynomialParser.dfy new file mode 100644 index 00000000..ff9cb465 --- /dev/null +++ b/src/Parsers/examples/polynomialParser.dfy @@ -0,0 +1,119 @@ +include "../stringParsers.dfy" + +module PolynomialParser { + import opened P = StringParsers + + // Parser combinators style + const parser: Parser + := ConcatL(RecursiveMap(map[ + "atom" := RecursiveDef(0, (callback: ParserSelector) => + Or(ConcatR( + String("("), ConcatL( + callback("term"), + String(")"))), + Or( + Map(Int(), (result: int) => Number(result)), ConcatR( + String("x"), + Map(Maybe(ConcatR( + String("^"), Int())), + (result: Option) => + if result.Some? then Unknown(result.value) else Unknown(1) + ))))), + "factor" := RecursiveDef(1, (callback: ParserSelector) => + Bind(callback("atom"), (atom: Expr) => + Rep( + Concat(Or(String("*"), Or(String("/"), String("%"))), + callback("atom")), + Expr.InfixBuilder(), atom) + )), + + "term" := RecursiveDef(2, (callback: ParserSelector) => + Bind(callback("factor"), (factor: Expr) => + Rep( + Concat(Or(String("+"), String("-")), + callback("factor")), + Expr.InfixBuilder(), factor) + )) + ], "term"), EndOfString()) + + type Result = Wrappers.Result + + datatype Expr = + | Binary(op: string, left: Expr, right: Expr) + | Number(value: int) + | Unknown(power: int) + { + + function Simplify(): Result { + match this { + case Number(x: int) => Result.Success(this) + case Binary(op, left, right) => + var l :- left.Simplify(); + var r :- right.Simplify(); + if l.Number? && r.Number? then + match op { + case "+" => Result.Success(Number(l.value + r.value)) + case "-" => Result.Success(Number(l.value - r.value)) + case "*" => Result.Success(Number(l.value * r.value)) + case "/" => + if r.value == 0 then + Result.Failure("Division by zero (" + right.ToString() + + " evaluates to zero)") + else + Result.Success(Number(l.value / r.value)) + case "%" => + if r.value == 0 then + Result.Failure("Modulo by zero (" + right.ToString() + + " evaluates to zero)") + else + Result.Success(Number(l.value % r.value)) + case _ => Result.Failure("Unsupported operator: " + op) + } + else + Result.Success(Binary(op, l, r)) + case Unknown(0) => Result.Success(Number(1)) + case Unknown(_) => Result.Success(this) + } + } + + static function InfixBuilder(): (Expr, (string, Expr)) -> Expr + { + (left: Expr, right: (string, Expr)) => Binary(right.0, left, right.1) + } + + function ToString(): string { + match this + case Number(x) => P.intToString(x) + case Binary(op, left, right) => + "(" + + left.ToString() + op + right.ToString() + + ")" + case Unknown(power) => + if power == 1 then "x" else if power == 0 then "1" else + "x^" + P.intToString(power) + } + } + + method Main(args: seq) { + if |args| <= 1 { + print "Please provide a polynomial to parse as argument\n"; + return; + } + for i := 1 to |args| { + var input := args[i]; + match parser(input) { + case Success(result, remaining) => + print "Polynomial:", result.ToString(), "\n"; + match result.Simplify() { + case Success(x) => + print "Simplified:", x.ToString(), "\n"; + case Failure(message) => + print message; + } + case failure => + PrintFailure(input, failure); + } + print "\n"; + } + } +} \ No newline at end of file diff --git a/src/Parsers/examples/polynomialParserBuilder.dfy b/src/Parsers/examples/polynomialParserBuilder.dfy new file mode 100644 index 00000000..75e1a644 --- /dev/null +++ b/src/Parsers/examples/polynomialParserBuilder.dfy @@ -0,0 +1,112 @@ +include "../stringParsersBuilders.dfy" + + +module PolynomialParsersBuilder { + import opened StringParsersBuilders + + import P = StringParsersBuilders.P + + // PArsers builder style + const parser: B + := + Rec(map[ + "atom" := RecDef(0, (c: RecSel) => + O([ + String("(").e_I(c("term")).I_e(String(")")), + Int().M((result: int) => Number(result)), + String("x").e_I(String("^").e_I(Int()).Maybe().M( + (result: P.Option) => + if result.Some? then Unknown(result.value) else Unknown(1))) + ])), + + "factor" := RecDef(1, (c: RecSel) => + c("atom").Bind((atom: Expr) => // TODO: Finish this one + O([String("*"), String("/"), String("%")]) + .I_I(c("atom")).Rep(atom, Expr.InfixBuilder()))), + + "term" := RecDef(2, (c: RecSel) => + c("factor").Bind((atom: Expr) => + O([String("+"), String("-")]) + .I_I(c("factor")).Rep(atom, Expr.InfixBuilder()))) + ], "term") + .I_e(End()) + + type Result = StringParsersBuilders.P.Wrappers.Result + + datatype Expr = + | Binary(op: string, left: Expr, right: Expr) + | Number(value: int) + | Unknown(power: int) + { + + function Simplify(): Result { + match this { + case Number(x: int) => Result.Success(this) + case Binary(op, left, right) => + var l :- left.Simplify(); + var r :- right.Simplify(); + if l.Number? && r.Number? then + match op { + case "+" => Result.Success(Number(l.value + r.value)) + case "-" => Result.Success(Number(l.value - r.value)) + case "*" => Result.Success(Number(l.value * r.value)) + case "/" => + if r.value == 0 then + Result.Failure("Division by zero (" + right.ToString() + + " evaluates to zero)") + else + Result.Success(Number(l.value / r.value)) + case "%" => + if r.value == 0 then + Result.Failure("Modulo by zero (" + right.ToString() + + " evaluates to zero)") + else + Result.Success(Number(l.value % r.value)) + case _ => Result.Failure("Unsupported operator: " + op) + } + else + Result.Success(Binary(op, l, r)) + case Unknown(0) => Result.Success(Number(1)) + case Unknown(_) => Result.Success(this) + } + } + static function InfixBuilder(): (Expr, (string, Expr)) -> Expr + { + (left: Expr, right: (string, Expr)) => Binary(right.0, left, right.1) + } + function ToString(): string { + match this + case Number(x) => P.intToString(x) + case Binary(op, left, right) => + "(" + + left.ToString() + op + right.ToString() + + ")" + case Unknown(power) => + if power == 1 then "x" else if power == 0 then "1" else + "x^" + P.intToString(power) + } + } + + method Main(args: seq) { + if |args| <= 1 { + print "Please provide a polynomial to parse as argument\n"; + return; + } + for i := 1 to |args| { + var input := args[i]; + match parser.apply(input) { + case Success(result, remaining) => + print "Polynomial:", result.ToString(), "\n"; + match result.Simplify() { + case Success(x) => + print "Simplified:", x.ToString(), "\n"; + case Failure(message) => + print message; + } + case failure => + P.PrintFailure(input, failure); + } + print "\n"; + } + } +} \ No newline at end of file diff --git a/src/Parsers/parsers.dfy b/src/Parsers/parsers.dfy index 9776c411..b9caa7e5 100644 --- a/src/Parsers/parsers.dfy +++ b/src/Parsers/parsers.dfy @@ -34,6 +34,8 @@ abstract module Parsers Rep, Recursive, RecursiveMap, + Any, + Many, Debug, intToString, digitToInt, @@ -547,6 +549,22 @@ abstract module Parsers definitionFun(callback)(input) } + opaque function Any(test: C -> bool, name: string): (p: Parser) + // A parser that returns the current char if it passes the test + // Returns a recoverable error based on the name otherwise + { + (input: seq) => + if 0 < |input| && test(input[0]) then Success(input[0], input[1..]) + else Failure(Recoverable, + FailureData("expected a "+name, input, Option.None)) + } + + opaque function Many(test: C -> bool, name: string): (p: Parser>) + { + Bind(Any(test, name), (c: C) => + Rep(Any(test, name), (s: seq, c': C) => s + [c'], [c])) + } + function Debug_(message: string): string { message } by method { diff --git a/src/Parsers/parsersBuilders.dfy b/src/Parsers/parsersBuilders.dfy index bd4557fe..fdd5023e 100644 --- a/src/Parsers/parsersBuilders.dfy +++ b/src/Parsers/parsersBuilders.dfy @@ -28,14 +28,15 @@ abstract module ParserBuilders { provides B.I_I provides B.M provides B.Maybe - provides B.Then + provides B.Bind provides B.Rep provides End + provides Any, Many reveals B - reveals RecDef, FailureLevel, Sel + reveals RecDef, FailureLevel, RecSel type FailureLevel = P.FailureLevel - type Sel = string -> B + type RecSel = string -> B // Wrap the constructor in a class where the size is constant so that users // don'result need to provide it. @@ -64,7 +65,7 @@ abstract module ParserBuilders { { B(P.Map(apply, mappingFunc)) } - function Then(other: R -> B): (p: B) + function Bind(other: R -> B): (p: B) { B(P.Bind(apply, (result: R) => other(result).apply)) } @@ -98,10 +99,21 @@ abstract module ParserBuilders { { B(P.EndOfString()) } + + function Any(test: P.C -> bool, name: string): B + { + B(P.Any(test, name)) + } + + function Many(test: P.C -> bool, name: string): B> + { + B(P.Many(test, name)) + } + datatype RecDef = RecDef( order: nat, - definition: Sel -> B) + definition: RecSel -> B) opaque function Rec( underlying: map>, diff --git a/src/Parsers/stringParsers.dfy b/src/Parsers/stringParsers.dfy index 7aa7c45e..962d7488 100644 --- a/src/Parsers/stringParsers.dfy +++ b/src/Parsers/stringParsers.dfy @@ -3,7 +3,6 @@ include "parsers.dfy" module StringParsers refines Parsers { export StringParsers extends Parsers provides - CharTest, Char, Digit, DigitNumber, @@ -21,26 +20,16 @@ module StringParsers refines Parsers { // String-specific parser combinators // ################################## - opaque function CharTest(test: char -> bool, name: string): (p: Parser) - // A parser that returns the current char if it passes the test - // Returns a recoverable error based on the name otherwise - { - (input: string) => - if 0 < |input| && test(input[0]) then Success(input[0], input[1..]) - else Failure(Recoverable, - FailureData("expected a "+name, input, Option.None)) - } - opaque function Char(expectedChar: char): (p: Parser) // A parser that tests if the current char is the given expected char { - CharTest((c: char) => c == expectedChar, [expectedChar]) + Any((c: char) => c == expectedChar, [expectedChar]) } opaque function Digit(): (p: Parser) // A parser that tests if the current char is a digit and returns it { - CharTest(c => c in "0123456789", "digit") + Any(c => c in "0123456789", "digit") } opaque function DigitNumber(): (p: Parser) From f262f7747b8a4dcdd42edbc8423cbd72e100bed5 Mon Sep 17 00:00:00 2001 From: Mikael Mayer Date: Wed, 15 Nov 2023 09:14:43 -0600 Subject: [PATCH 13/22] Removed files to prepare renaming conventions --- src/Parsers/examples/Tutorial.dfy | 81 --- src/Parsers/examples/polynomialParser.dfy | 119 ---- .../examples/polynomialParserBuilder.dfy | 112 ---- src/Parsers/parsers.dfy | 615 ------------------ src/Parsers/parsersBuilders.dfy | 134 ---- src/Parsers/parsersDisplayers.dfy | 46 -- src/Parsers/parsersTests.dfy | 410 ------------ src/Parsers/stringParsers.dfy | 145 ----- src/Parsers/stringParsersBuilders.dfy | 15 - 9 files changed, 1677 deletions(-) delete mode 100644 src/Parsers/examples/Tutorial.dfy delete mode 100644 src/Parsers/examples/polynomialParser.dfy delete mode 100644 src/Parsers/examples/polynomialParserBuilder.dfy delete mode 100644 src/Parsers/parsers.dfy delete mode 100644 src/Parsers/parsersBuilders.dfy delete mode 100644 src/Parsers/parsersDisplayers.dfy delete mode 100644 src/Parsers/parsersTests.dfy delete mode 100644 src/Parsers/stringParsers.dfy delete mode 100644 src/Parsers/stringParsersBuilders.dfy diff --git a/src/Parsers/examples/Tutorial.dfy b/src/Parsers/examples/Tutorial.dfy deleted file mode 100644 index 7a72cf1c..00000000 --- a/src/Parsers/examples/Tutorial.dfy +++ /dev/null @@ -1,81 +0,0 @@ -include "../stringParsersBuilders.dfy" - -module Tutorial.Parsers { - import opened StringParsers - - method {:test} TestSplit1() { - var nonComma: Parser := - Many((c: char) => c != ',', "non-comma"); - var p := - Bind(nonComma, (result: string) => - Rep(ConcatR(String(","), nonComma), - (acc, elem) => acc + [elem], - [result] - )); - - expect p("abc,d,efg") == ParseResult.Success(["abc","d","efg"], ""); - expect p("abc,d,,") == - ParseResult.Failure(Recoverable, FailureData("expected a non-comma", ",", Option.None)); - PrintFailure("abc,d,,", p("abc,d,,")); - // Displays - // Error: - // 1: abc,d,, - // ^ - // expected a non-comma - } - - function flatten(): ((A, (A, A))) -> (A, A, A) { - (input: (A, (A, A))) => - (input.0, input.1.0, input.1.1) - } - - method {:test} TestTicTacToe() { - var x := OrSeq([ - String("O"), String("X"), String(" ") - ]); - var v := String("|"); - var row := Map(Concat(x, ConcatR(v, Concat(x, ConcatR(v, x)))), - flatten()); - var sep := String("\n-+-+-\n"); - var grid := Map( - Concat(row, ConcatR(sep, Concat(row, ConcatR(sep, row)))), - flatten<(string, string, string)>()) - ; - var input := "O|X| \n-+-+-\nX|O| \n-+-+-\nP| |O"; - // 012345 678901 234567 890123 45678 - var r := grid(input); - expect r.IsFailure(); - expect |input| - |r.data.remaining| == 24; - expect r.data.message == "expected 'O'"; - expect r.data.next.Some?; - expect r.data.next.value.message == "expected 'X'"; - expect r.data.next.value.next.Some?; - expect r.data.next.value.next.value.message == "expected ' '"; - expect r.data.next.value.next.value.next.None?; - PrintFailure(input, r); - // Displays: - // Error: - // 5: P| |O - // ^ - // expected 'O', or - // expected 'X', or - // expected ' ' - } -} - - -module Tutorial.ParsersBuilders { - import opened StringParsersBuilders - - method {:test} TestSplit1() { - var nonComma: B := - Many((c: char) => c != ',', "non-comma"); - var p := - nonComma.Bind((result: string) => - String(",").e_I(nonComma).Rep([result], - (acc: seq, elem: string) => acc + [elem] - )); - - expect p.apply("abc,d,efg") == P.ParseResult.Success(["abc","d","efg"], ""); - } -} \ No newline at end of file diff --git a/src/Parsers/examples/polynomialParser.dfy b/src/Parsers/examples/polynomialParser.dfy deleted file mode 100644 index ff9cb465..00000000 --- a/src/Parsers/examples/polynomialParser.dfy +++ /dev/null @@ -1,119 +0,0 @@ -include "../stringParsers.dfy" - -module PolynomialParser { - import opened P = StringParsers - - // Parser combinators style - const parser: Parser - := ConcatL(RecursiveMap(map[ - "atom" := RecursiveDef(0, (callback: ParserSelector) => - Or(ConcatR( - String("("), ConcatL( - callback("term"), - String(")"))), - Or( - Map(Int(), (result: int) => Number(result)), ConcatR( - String("x"), - Map(Maybe(ConcatR( - String("^"), Int())), - (result: Option) => - if result.Some? then Unknown(result.value) else Unknown(1) - ))))), - "factor" := RecursiveDef(1, (callback: ParserSelector) => - Bind(callback("atom"), (atom: Expr) => - Rep( - Concat(Or(String("*"), Or(String("/"), String("%"))), - callback("atom")), - Expr.InfixBuilder(), atom) - )), - - "term" := RecursiveDef(2, (callback: ParserSelector) => - Bind(callback("factor"), (factor: Expr) => - Rep( - Concat(Or(String("+"), String("-")), - callback("factor")), - Expr.InfixBuilder(), factor) - )) - ], "term"), EndOfString()) - - type Result = Wrappers.Result - - datatype Expr = - | Binary(op: string, left: Expr, right: Expr) - | Number(value: int) - | Unknown(power: int) - { - - function Simplify(): Result { - match this { - case Number(x: int) => Result.Success(this) - case Binary(op, left, right) => - var l :- left.Simplify(); - var r :- right.Simplify(); - if l.Number? && r.Number? then - match op { - case "+" => Result.Success(Number(l.value + r.value)) - case "-" => Result.Success(Number(l.value - r.value)) - case "*" => Result.Success(Number(l.value * r.value)) - case "/" => - if r.value == 0 then - Result.Failure("Division by zero (" + right.ToString() - + " evaluates to zero)") - else - Result.Success(Number(l.value / r.value)) - case "%" => - if r.value == 0 then - Result.Failure("Modulo by zero (" + right.ToString() - + " evaluates to zero)") - else - Result.Success(Number(l.value % r.value)) - case _ => Result.Failure("Unsupported operator: " + op) - } - else - Result.Success(Binary(op, l, r)) - case Unknown(0) => Result.Success(Number(1)) - case Unknown(_) => Result.Success(this) - } - } - - static function InfixBuilder(): (Expr, (string, Expr)) -> Expr - { - (left: Expr, right: (string, Expr)) => Binary(right.0, left, right.1) - } - - function ToString(): string { - match this - case Number(x) => P.intToString(x) - case Binary(op, left, right) => - "(" - + left.ToString() + op + right.ToString() - + ")" - case Unknown(power) => - if power == 1 then "x" else if power == 0 then "1" else - "x^" + P.intToString(power) - } - } - - method Main(args: seq) { - if |args| <= 1 { - print "Please provide a polynomial to parse as argument\n"; - return; - } - for i := 1 to |args| { - var input := args[i]; - match parser(input) { - case Success(result, remaining) => - print "Polynomial:", result.ToString(), "\n"; - match result.Simplify() { - case Success(x) => - print "Simplified:", x.ToString(), "\n"; - case Failure(message) => - print message; - } - case failure => - PrintFailure(input, failure); - } - print "\n"; - } - } -} \ No newline at end of file diff --git a/src/Parsers/examples/polynomialParserBuilder.dfy b/src/Parsers/examples/polynomialParserBuilder.dfy deleted file mode 100644 index 75e1a644..00000000 --- a/src/Parsers/examples/polynomialParserBuilder.dfy +++ /dev/null @@ -1,112 +0,0 @@ -include "../stringParsersBuilders.dfy" - - -module PolynomialParsersBuilder { - import opened StringParsersBuilders - - import P = StringParsersBuilders.P - - // PArsers builder style - const parser: B - := - Rec(map[ - "atom" := RecDef(0, (c: RecSel) => - O([ - String("(").e_I(c("term")).I_e(String(")")), - Int().M((result: int) => Number(result)), - String("x").e_I(String("^").e_I(Int()).Maybe().M( - (result: P.Option) => - if result.Some? then Unknown(result.value) else Unknown(1))) - ])), - - "factor" := RecDef(1, (c: RecSel) => - c("atom").Bind((atom: Expr) => // TODO: Finish this one - O([String("*"), String("/"), String("%")]) - .I_I(c("atom")).Rep(atom, Expr.InfixBuilder()))), - - "term" := RecDef(2, (c: RecSel) => - c("factor").Bind((atom: Expr) => - O([String("+"), String("-")]) - .I_I(c("factor")).Rep(atom, Expr.InfixBuilder()))) - ], "term") - .I_e(End()) - - type Result = StringParsersBuilders.P.Wrappers.Result - - datatype Expr = - | Binary(op: string, left: Expr, right: Expr) - | Number(value: int) - | Unknown(power: int) - { - - function Simplify(): Result { - match this { - case Number(x: int) => Result.Success(this) - case Binary(op, left, right) => - var l :- left.Simplify(); - var r :- right.Simplify(); - if l.Number? && r.Number? then - match op { - case "+" => Result.Success(Number(l.value + r.value)) - case "-" => Result.Success(Number(l.value - r.value)) - case "*" => Result.Success(Number(l.value * r.value)) - case "/" => - if r.value == 0 then - Result.Failure("Division by zero (" + right.ToString() - + " evaluates to zero)") - else - Result.Success(Number(l.value / r.value)) - case "%" => - if r.value == 0 then - Result.Failure("Modulo by zero (" + right.ToString() - + " evaluates to zero)") - else - Result.Success(Number(l.value % r.value)) - case _ => Result.Failure("Unsupported operator: " + op) - } - else - Result.Success(Binary(op, l, r)) - case Unknown(0) => Result.Success(Number(1)) - case Unknown(_) => Result.Success(this) - } - } - static function InfixBuilder(): (Expr, (string, Expr)) -> Expr - { - (left: Expr, right: (string, Expr)) => Binary(right.0, left, right.1) - } - function ToString(): string { - match this - case Number(x) => P.intToString(x) - case Binary(op, left, right) => - "(" - + left.ToString() + op + right.ToString() - + ")" - case Unknown(power) => - if power == 1 then "x" else if power == 0 then "1" else - "x^" + P.intToString(power) - } - } - - method Main(args: seq) { - if |args| <= 1 { - print "Please provide a polynomial to parse as argument\n"; - return; - } - for i := 1 to |args| { - var input := args[i]; - match parser.apply(input) { - case Success(result, remaining) => - print "Polynomial:", result.ToString(), "\n"; - match result.Simplify() { - case Success(x) => - print "Simplified:", x.ToString(), "\n"; - case Failure(message) => - print message; - } - case failure => - P.PrintFailure(input, failure); - } - print "\n"; - } - } -} \ No newline at end of file diff --git a/src/Parsers/parsers.dfy b/src/Parsers/parsers.dfy deleted file mode 100644 index b9caa7e5..00000000 --- a/src/Parsers/parsers.dfy +++ /dev/null @@ -1,615 +0,0 @@ -include "../Wrappers.dfy" - -abstract module Parsers -// Functional parsers consuming sequences seq from the left to the right. -// For parsers over strings, please refer to the StringParsers module -{ - import Wrappers - - export - provides C, // The character type - Wrappers, // Imported module - Valid, - Succeed, - Epsilon, - Fail, - EndOfString, - Bind, - BindSucceeds, - BindResult, - Map, - Not, - And, - Or, - OrSeq, - Lookahead, - ?, - If, - Maybe, - ConcatMap, - Concat, - ConcatL, - ConcatR, - RepSeq, - Rep, - Recursive, - RecursiveMap, - Any, - Many, - Debug, - intToString, - digitToInt, - stringToInt, - ParseResult.IsFailure, - ParseResult.PropagateFailure, - ParseResult.Extract - reveals - Parser, - ParserSelector, - Option, // From Wrappers - FailureLevel, - ParseResult, - FailureData, - RecursiveDef - - export All reveals * - - type C(!new, ==) - // The character of the sequence being parsed - - type Parser<+R> = seq -> ParseResult - // A parser is a total function from a position to a parse result - // Because it returns a delta pos, it cannot return a position negative from the origing - // If the parsing is out of context, it will return a failure. - - type ParserSelector = string -> Parser - // A parser selector is a function that, given a name that exists, - // returns a parser associated to this name - - type Option = Wrappers.Option - // The common option type, synonym definition - - datatype FailureData = - FailureData( - message: string, - remaining: seq, - next: Option) - // A Parser failure can mention several places - // (e.g. which could have continued to parse) - { - function Concat(other: FailureData): FailureData - // Concatenates two failure datas, the first staying in the front - { - if next == Option.None then - this.(next := Option.Some(other)) - else - FailureData(message, remaining, Option.Some(next.value.Concat(other))) - } - } - - datatype FailureLevel = - // Failure level for parse results. - // A Fatal error results in a unique FailurePosition - // and will be propagated to the top ASAP - // A Recoverable error can typically be processed. - // Comittedness of the parser only depends if the .Remaining() - // of the parse result has moved since the input was provided. - Fatal | Recoverable - - datatype ParseResult<+R> = - // ParseResult is the type of what a parser taking a seq would return - | Failure(level: FailureLevel, data: FailureData) - // Returned if a parser failed. - | Success(result: R, remaining: seq) - // Returned if a parser succeeds, with the increment in the position - { - function Remaining(): seq - // If Remaining() is the same as the input, the parser is "uncommitted", - // which means combinators like Or and RepSeq can try alternatives - { - if Success? then remaining else data.remaining - } - - predicate IsFailure() { - Failure? - } - - predicate IsFatalFailure() { - Failure? && level == Fatal - } - - predicate IsFatal() - requires IsFailure() - { - level == Fatal - } - - function PropagateFailure(): ParseResult - requires IsFailure() - { - Failure(level, data) - } - - function Extract(): (R, seq) - requires !IsFailure() - { - (result, remaining) - } - - function Map(f: R -> R'): ParseResult - // Transforms the result of a successful parse result - { - match this - case Success(result, remaining) => - Success(f(result), remaining) - case Failure(level, data) => - Failure(level, data) - } - - function MapRecoverableError( - f: FailureData -> FailureData - ): ParseResult - // If the result is a recoverable error, - // let the function process it - { - match this - case Failure(Recoverable, data) => - Failure(Recoverable, f(data)) - case _ => this - } - - predicate NeedsAlternative(input: seq) - // Returns true if the parser result is a - // - A failure - // - Is recoverable - // - Did not consume any input (not-committed) - { - Failure? && level == Recoverable && input == Remaining() - } - } - - predicate IsRemaining(input: seq, remaining: seq) - // True if remaining is a suffix of the input - { - && |remaining| <= |input| - && input[|input|-|remaining|..] == remaining - } - - opaque ghost predicate Valid(underlying: Parser) - // A parser is valid iff for any input, it never returns a fatal error - // and always returns a suffix of its input - { - forall input: seq :: - && (underlying(input).Failure? ==> underlying(input).level == Recoverable) - && IsRemaining(input, underlying(input).Remaining()) - } - - // ######################################## - // Parser combinators. - // The following functions make it possible to create and compose parsers - // All these combinators provide Valid() parsers if their inputs are Valid() too - // ######################################## - - opaque function Succeed(result: R): (p: Parser) - // A parser that does not consume any input and returns the given value - { - (input: seq) => Success(result, input) - } - - opaque function Epsilon(): (p: Parser<()>) - // A parser that always succeeds, consumes nothing and returns () - { - Succeed(()) - } - - opaque function Fail(message: string, level: FailureLevel := Recoverable): Parser - // A parser that does not consume any input and returns the given failure - { - (input: seq) => Failure(level, FailureData(message, input, Option.None)) - } - - opaque function EndOfString(): Parser<()> - // A parser that fails if the string has not been entirely consumed - { - (input: seq) => - if |input| == 0 then Success((), input) - else Failure(Recoverable, FailureData("expected end of string", input, Option.None)) - } - - opaque function Bind( - left: Parser, - right: L -> Parser - ) : (p: Parser) - // Fails if the left parser fails. - // If the left parser succeeds, provides its result and the remaining sequence - // to the right parser generator. - // For a more general version, look at BindSucceeds - { - (input: seq) - => - var (leftResult, remaining) :- left(input); - right(leftResult)(remaining) - } - - opaque function BindSucceeds( - left: Parser, - right: (L, seq) -> Parser - ) : (p: Parser) - // Fails if the left parser fails. - // If the left parser succeeds, provides its result and its remaining - // to the right parser generator and returns its result applied to the remaining - // For a more general version, look at BindResult - { - (input: seq) - => - var (leftResult, remaining) :- left(input); - right(leftResult, remaining)(remaining) - } - - opaque function BindResult( - left: Parser, - right: (ParseResult, seq) -> Parser - ) : (p: Parser) - // Given a left parser and a parser generator based on the output - // of the left parser, - // returns the result of the right parser applied on the original input - { - (input: seq) - => - right(left(input), input)(input) - } - - opaque function Map(underlying: Parser, mappingFunc: R -> U) - : (p: Parser) - // A parser combinator that makes it possible to transform the result of a parser in another one - // The mapping function can be partial - // ensures forall pos | MapSpec(size, underlying, mappingFunc, pos) :: - // p.requires(pos) - { - (input: seq) => - var (result, remaining) :- underlying(input); - var u := mappingFunc(result); - Success(u, remaining) - } - - opaque function Not(underlying: Parser): Parser<()> - // Returns a parser that succeeds if the underlying parser fails - // and vice-versa. The result does not consume any input - { - (input: seq) => - var l := underlying(input); - if l.IsFailure() then - if l.IsFatal() then l.PropagateFailure() - else Success((), input) - else Failure(Recoverable, FailureData("not failed", input, Option.None)) - } - - opaque function And( - left: Parser, - right: Parser - ) : (p: Parser<(L, R)>) - // Make the two parsers parse the same string and, if both suceed, - // returns a pair of the two results, with the remaining of the right - { - (input: seq) => - var (l, remainingLeft) :- left(input); - var (r, remainingRight) :- right(input); - Success((l, r), remainingRight) - } - - opaque function Or( - left: Parser, - right: Parser - ) : (p: Parser) - // left parses the string. If left succeeds, returns - // if left fails, two cases - // - If the error is recoverable and the parser did not consume input, - // then return what right returns - // - Otherwise return both errors - { - (input: seq) => - var p := left(input); - if !p.NeedsAlternative(input) then p else - var p2 := right(input); - if !p2.NeedsAlternative(input) then p2 else - p2.MapRecoverableError( - dataRight => - p.data.Concat(dataRight)) - } - - opaque function OrSeq( - alternatives: seq> - ): Parser - { - if |alternatives| == 0 then Fail("no alternatives") else - if |alternatives| == 1 then alternatives[0] - else - Or(alternatives[0], OrSeq(alternatives[1..])) - } - - opaque function Lookahead(underlying: Parser): (p: Parser) - // If the underlying parser succeeds, - // returns its result without committing the input - // if the underlying parser fails, - // - If the failure is fatal, returns it as-it - // - If the failure is recoverable, returns it without comitting the input - { - (input: seq) => - var p := underlying(input); - if p.IsFailure() then - if p.IsFatal() then - p - else - p.(data := FailureData(p.data.message, input, Option.None)) - else - p.(remaining := input) - } - - opaque function ?(underlying: Parser): (p: Parser) - // Like Lookahead, except that if the parser succeeds, - // it keeps the committedness of the input. - // Identical to Lookahead, if the underlying parser fails, - // - If the failure is fatal, returns it as-it - // - If the failure is recoverable, returns it without comitting the input - { - (input: seq) => - var p := underlying(input); - if p.IsFailure() then - if p.IsFatal() then - p - else - p.(data := FailureData(p.data.message, input, Option.None)) - else - p - } - - opaque function If( - condition: Parser, - succeed: Parser - ) : (p: Parser) - // If the condifition fails, returns a non-committing failure - // Suitable to use in Or parsers - { - Bind(Lookahead(condition), (l: L) => succeed) - } - - opaque function Maybe(underlying: Parser): Parser> - // Transforms a recoverable failure into None, - // and wraps a success into Some(...) - { - (input: seq) => - var u := underlying(input); - if u.IsFatalFailure() then u.PropagateFailure() - else - if u.Success? then u.Map(result => Option.Some(result)) - else Success(Option.None, input) - } - - opaque function ConcatMap( - left: Parser, - right: Parser, - mapper: (L, R) -> T - ) : (p: Parser) - // Apply two consecutive parsers consecutively - // If both succeed, apply the mapper to the result and return it - { - (input: seq) - => - var (l, remaining) :- left(input); - var (r, remaining2) :- right(remaining); - Success(mapper(l, r), remaining2) - } - - opaque function Concat( - left: Parser, - right: Parser - ) : (p: Parser<(L, R)>) - // Apply two consecutive parsers consecutively - // If both succeed, return the pair of the two results - { - (input: seq) => - var (l, remaining) :- left(input); - var (r, remaining2) :- right(remaining); - Success((l, r), remaining2) - } - - opaque function ConcatR( - left: Parser, - right: Parser - ) : (p: Parser) - // Return only the result of the right parser if the two parsers match - { - ConcatMap(left, right, (l, r) => r) - } - - opaque function ConcatL( - left: Parser, - right: Parser - ) : (p: Parser) - // Return only the result of the right parser if the two parsers match - { - ConcatMap(left, right, (l, r) => l) - } - - opaque function RepSeq( - underlying: Parser - ): Parser> - // Repeats the underlying parser until the first failure - // that accepts alternatives, and returns the underlying sequence - { - Rep(underlying, (result: seq, r: R) => result + [r], []) - } - - opaque function Rep( - underlying: Parser, - combine: (A, B) -> A, - acc: A - ): Parser - // Repeats the underlying parser until the first failure - // that accepts alternatives, combining results to an accumulator - // and return the final accumulator - { - (input: seq) => Rep_(underlying, combine, acc, input) - } - - opaque function {:tailrecursion true} Rep_( - underlying: Parser, - combine: (A, B) -> A, - acc: A, - input: seq - ): (p: ParseResult) - decreases |input| - // RepSeq the underlying parser over the input until a recoverable failure happens - // and returns the accumulated results - { - match underlying(input) - case Success(result, remaining) => - if |remaining| >= |input| then Success(acc, input) else - Rep_(underlying, combine, combine(acc, result), remaining) - case failure => - if failure.NeedsAlternative(input) then - Success(acc, input) - else - failure.PropagateFailure() - } - - opaque function Recursive( - underlying: Parser -> Parser - ): (p: Parser) - // Given a function that requires a parser to return a parser, - // provide the result of this parser to that function itself. - // Careful: This function is not tail-recursive and will consume stack. - // Prefer using Rep() or RepSeq() for sequences - { - (input: seq) => Recursive_(underlying, input) - } - - opaque function Recursive_( - underlying: Parser -> Parser, - input: seq - ): (p: ParseResult) - // Implementation for Recursive() - decreases |input| - { - var callback: Parser := - (remaining: seq) => - if |remaining| < |input| then - Recursive_(underlying, remaining) - else if |remaining| == |input| then - Failure(Recoverable, FailureData("no progress", remaining, Option.None)) - else - Failure(Fatal, FailureData("fixpoint called with an increasing remaining sequence", remaining, Option.None)); - underlying(callback)(input) - } - - opaque function RecursiveMap( - underlying: map>, - fun: string): (p: Parser) - // Given a map of name := recursive definitions, - // provide the result of this parser to the recursive definitions - // and set 'fun' as the initial parser. - // Careful: This function is not tail-recursive and will consume stack - { - (input: seq) => RecursiveMap_(underlying, fun, input) - } - - datatype RecursiveDef = RecursiveDef( - order: nat, - definition: ParserSelector -> Parser - ) // The order must be decreasing every time the function steps in - // But it can jump to a bigger order if the input is consumed - - opaque function RecursiveMap_( - underlying: map>, - fun: string, - input: seq - ): (p: ParseResult) - // Implementation for RecursiveMap() - decreases |input|, if fun in underlying then underlying[fun].order else 0 - { - if fun !in underlying then Failure(Fatal, FailureData("parser '"+fun+"' not found", input, Option.None)) else - var RecursiveDef(orderFun, definitionFun) := underlying[fun]; - var callback: ParserSelector - := - (fun': string) => - (var p : Parser := - if fun' !in underlying.Keys then - Fail(fun' + " not defined", Fatal) - else - var RecursiveDef(orderFun', definitionFun') := underlying[fun']; - (remaining: seq) => - if |remaining| < |input| || (|remaining| == |input| && orderFun' < orderFun) then - RecursiveMap_(underlying, fun', remaining) - else if |remaining| == |input| then - Failure(Recoverable, FailureData("non-progressing recursive call requires that order of '" - +fun'+"' ("+intToString(orderFun')+") is lower than the order of '"+fun+"' ("+intToString(orderFun)+")", remaining, Option.None)) - else - Failure(Fatal, FailureData("parser did not return a suffix of the input", remaining, Option.None)) - ; p); - definitionFun(callback)(input) - } - - opaque function Any(test: C -> bool, name: string): (p: Parser) - // A parser that returns the current char if it passes the test - // Returns a recoverable error based on the name otherwise - { - (input: seq) => - if 0 < |input| && test(input[0]) then Success(input[0], input[1..]) - else Failure(Recoverable, - FailureData("expected a "+name, input, Option.None)) - } - - opaque function Many(test: C -> bool, name: string): (p: Parser>) - { - Bind(Any(test, name), (c: C) => - Rep(Any(test, name), (s: seq, c': C) => s + [c'], [c])) - } - - function Debug_(message: string): string { - message - } by method { - print message, "\n"; - return message; - } - - opaque function Debug(msg: string, other: Parser): (p: Parser) - // A parser that, when invoked, will print a message before applying its underlying parser - // and also afterwards - { - (input: seq) => - var _ := Debug_(msg + "(before)"); - var p := other(input); - var _ := Debug_(msg + "(after)"); - p - } - - opaque function intToString(n: int): string - // Converts an integer to a string - decreases if n < 0 then 1 - n else n - { - if n < 0 then "-" + intToString(-n) else - match n - case 0 => "0" case 1 => "1" case 2 => "2" case 3 => "3" case 4 => "4" - case 5 => "5" case 6 => "6" case 7 => "7" case 8 => "8" case 9 => "9" - case _ => intToString(n / 10) + intToString(n % 10) - } - - opaque function digitToInt(c: char): int { - match c - case '0' => 0 case '1' => 1 case '2' => 2 case '3' => 3 case '4' => 4 - case '5' => 5 case '6' => 6 case '7' => 7 case '8' => 8 case '9' => 9 - case _ => -1 - } - - opaque function stringToInt(s: string): int - // Converts a string to a string - decreases |s| - { - if |s| == 0 then 0 else - if |s| == 1 then digitToInt(s[0]) - else if s[0] == '-' then - 0 - stringToInt(s[1..]) - else - stringToInt(s[0..|s|-1])*10 + stringToInt(s[|s|-1..|s|]) - } -} \ No newline at end of file diff --git a/src/Parsers/parsersBuilders.dfy b/src/Parsers/parsersBuilders.dfy deleted file mode 100644 index fdd5023e..00000000 --- a/src/Parsers/parsersBuilders.dfy +++ /dev/null @@ -1,134 +0,0 @@ -include "parsers.dfy" - -// Nice wanna-to-be DSL to build parsers to avoid too much parenthesis nesting -// B(p) returns a parser builder from a normal parser. -// B1.o_I(B2) will parse both but return the result of B2 -// B1.I_o(B2) will parse both but return the result of B1 -// B.M(f) will map the result of the parser builder by f if succeeded -// B1.O(B2) will either parse B1, or B2 if B1 fails with Recoverable -// FirstOf([B1, B2, B3]) -// will parse with B1, but if B1 fails with Recoverable, -// it will parse with B2, but if B2 fails with Recoverable, -// it will parse with B3 -// R(v) returns a parser builder that returns immediately v -// -// There are more parser builders in the trait Engine, when their spec depends on -// a predetermined input, e.g. to tests for constant strings - -abstract module ParserBuilders { - import P: Parsers - export - provides P - provides O - provides Ok - provides Fail - provides Rec - provides B.e_I - provides B.I_e - provides B.I_I - provides B.M - provides B.Maybe - provides B.Bind - provides B.Rep - provides End - provides Any, Many - reveals B - reveals RecDef, FailureLevel, RecSel - - type FailureLevel = P.FailureLevel - type RecSel = string -> B - - // Wrap the constructor in a class where the size is constant so that users - // don'result need to provide it. - datatype B = B(apply: P.Parser) - { - function Maybe(): B> { - B(P.Maybe(apply)) - } - function e_I(other: B): (p: B) - // Excludes the left, includes the right - { - B(P.ConcatR(apply, other.apply)) - } - function I_e(other: B): (p: B) - // Includes the left, excludes the right - { - B(P.ConcatL(apply, other.apply)) - } - function I_I(other: B): (p: B<(R, U)>) - // Includes the left, excludes the right - { - B(P.Concat(apply, other.apply)) - } - function M(mappingFunc: R -> U): (p: B) - // Maps the result - { - B(P.Map(apply, mappingFunc)) - } - function Bind(other: R -> B): (p: B) - { - B(P.Bind(apply, (result: R) => other(result).apply)) - } - - function Rep(init: A, combine: (A, R) -> A): (p: B) - { - B(P.Rep(apply, combine, init)) - } - } - - function Ok(result: R): (p: B) - { - B(P.Succeed(result)) - } - - function Fail(message: string, level: FailureLevel := FailureLevel.Recoverable): (p: B) - { - B(P.Fail(message, level)) - } - - function O(alternatives: seq>): B - // Declares a set of alternatives as a single list - { - if |alternatives| == 0 then Fail("no alternative") else - if |alternatives| == 1 then alternatives[0] - else - B(P.Or(alternatives[0].apply, O(alternatives[1..]).apply)) - } - - function End(): B<()> - { - B(P.EndOfString()) - } - - function Any(test: P.C -> bool, name: string): B - { - B(P.Any(test, name)) - } - - function Many(test: P.C -> bool, name: string): B> - { - B(P.Many(test, name)) - } - - - datatype RecDef = RecDef( - order: nat, - definition: RecSel -> B) - - opaque function Rec( - underlying: map>, - fun: string): (p: B) - { - B(P.RecursiveMap( - map k <- underlying :: k := - P.RecursiveDef( - underlying[k].order, - (selector: P.ParserSelector) => - underlying[k].definition( - (name: string) => - B(selector(name)) - ).apply), - fun - )) - } -} diff --git a/src/Parsers/parsersDisplayers.dfy b/src/Parsers/parsersDisplayers.dfy deleted file mode 100644 index b1838a17..00000000 --- a/src/Parsers/parsersDisplayers.dfy +++ /dev/null @@ -1,46 +0,0 @@ -include "parsers.dfy" - -// From these parsers, we can create displayers -// and prove the roundtrip displayer / parser if we wanted to -abstract module ParsersDiplayers { - import Parsers`All - - type Parser = Parsers.Parser - type C = Parsers.C - - type Displayer<-R> = (R, seq) -> seq - - function Concat( - left: Displayer, - right: Displayer - ): Displayer<(A, B)> { - (ab: (A, B), remaining: seq) => - var remaining2 := right(ab.1, remaining); - var remaining3 := left(ab.0, remaining2); - remaining3 - } - - ghost predicate Roundtrip(parse: Parser, display: Displayer) - // The parser and the displayer are dual to each other - // means that if we parse after printing, we get the same result - { - forall a: A, remaining: seq :: - parse(display(a, remaining)) == Parsers.Success(a, remaining) - } - - lemma {:rlimit 1000} ConcatRoundtrip( - pA: Parser, ppA: Displayer, - pB: Parser, ppB: Displayer - ) - requires Roundtrip(pA, ppA) && Roundtrip(pB, ppB) - ensures Roundtrip(Parsers.Concat(pA, pB), Concat(ppA, ppB)) - { - reveal Parsers.Concat(); - var p := Parsers.Concat(pA, pB); - var d := Concat(ppA, ppB); - forall ab: (A, B), remaining: seq ensures - p(d(ab, remaining)) == Parsers.Success(ab, remaining) - { - } - } -} \ No newline at end of file diff --git a/src/Parsers/parsersTests.dfy b/src/Parsers/parsersTests.dfy deleted file mode 100644 index 4fc98360..00000000 --- a/src/Parsers/parsersTests.dfy +++ /dev/null @@ -1,410 +0,0 @@ -include "parsers.dfy" - -abstract module ParserTests refines Parsers { - lemma AboutSucceed(result: R, input: seq) - ensures - var p := Succeed(result); - && p(input).Success? - && p(input).remaining == input - { reveal Succeed(); } - - lemma AboutFail_(message: string, level: FailureLevel, input: seq) - ensures - var p := Fail(message, level)(input); - && p.Failure? - && p.data == FailureData(message, input, Option.None) - && p.level == level - { - reveal Fail(); - } - - lemma AboutFail_2(message: string, input: seq) - ensures - var p := Fail(message)(input); - && p.Failure? - && p.level == Recoverable - && p.data == FailureData(message, input, Option.None) - { - reveal Fail(); - } - - lemma AboutBind_( - left: Parser, - right: (L, seq) -> Parser, - input: seq - ) - ensures - var p := BindSucceeds(left, right)(input); - && var leftResult := left(input); - && !leftResult.IsFailure() - ==> var leftValues := left(input).Extract(); - && var rightResult := right(leftValues.0, leftValues.1)(leftValues.1); - && !rightResult.IsFailure() - ==> && !p.IsFailure() - && p.remaining == rightResult.remaining - && p.result == rightResult.result - { - reveal BindSucceeds(); - } - - lemma AboutMap_(underlying: Parser, mappingFunc: R -> U, input: seq) - ensures var p := Map(underlying, mappingFunc); - && (underlying(input).Success? <==> p(input).Success?) - && (p(input).Success? ==> - && p(input).remaining == underlying(input).remaining - && p(input).result == mappingFunc(underlying(input).result)) - { - reveal Map(); - reveal BindSucceeds(); - reveal Succeed(); - } - - function BindMapCallback(mappingFunc: R -> U): - (R, seq) -> Parser - { - (result: R, remaining: seq) => Succeed(mappingFunc(result)) - } - - lemma AboutMap_Bind_(underlying: Parser, mappingFunc: R -> U, input: seq) - ensures Map(underlying, mappingFunc)(input) - == BindSucceeds(underlying, BindMapCallback(mappingFunc))(input) - { - reveal Map(); - reveal BindSucceeds(); - reveal Succeed(); - } - - lemma AboutConcat( - left: Parser, - right: Parser, - input: seq) - ensures var p := Concat(left, right); - && (p(input).Success? ==> - && left(input).Success? - && p(input).result.0 == left(input).result - && var input2 := left(input).remaining; - && right(input2).Success? - && p(input).result.1 == right(input2).result - && p(input).remaining == right(input2).remaining) - { - reveal Concat(); - reveal ConcatMap(); - } - - function BindConcatCallback(right: Parser): (L, seq) -> Parser<(L, R)> - { - (l: L, remaining: seq) => - Map(right, (r: R) => (l, r)) - } - - lemma AboutConcatBind_( - left: Parser, - right: Parser, - input: seq) - ensures Concat(left, right)(input) == BindSucceeds(left, BindConcatCallback(right))(input) - { - reveal Concat(); - reveal BindSucceeds(); - reveal Succeed(); - reveal Map(); - reveal ConcatMap(); - } - - lemma AboutConcatR( - left: Parser, - right: Parser, - input: seq) - ensures var p := ConcatR(left, right); - && (p(input).Success? ==> - && left(input).Success? - && var input2 := left(input).remaining; - && right(input2).Success? - && p(input).result == right(input2).result - && p(input).remaining == right(input2).remaining) - { - reveal ConcatR(); - reveal ConcatMap(); - } - - function first(): ((L, R)) -> L { - (lr: (L, R)) => lr.0 - } - function second(): ((L, R)) -> R { - (lr: (L, R)) => lr.1 - } - lemma AboutConcatConcatR( - left: Parser, - right: Parser, - input: seq) - ensures Map(Concat(left, right), second())(input) == ConcatR(left, right)(input) - { - reveal Concat(); - reveal Succeed(); - reveal ConcatR(); - reveal Map(); - reveal ConcatMap(); - } - - - lemma AboutConcatL( - left: Parser, - right: Parser, - input: seq) - ensures var p := ConcatL(left, right); - && (p(input).Success? ==> - && left(input).Success? - && var input2 := left(input).remaining; - && right(input2).Success? - && p(input).result == left(input).result - && p(input).remaining == right(input2).remaining) - { - reveal ConcatL(); - reveal ConcatMap(); - } - lemma AboutConcatConcatL( - left: Parser, - right: Parser, - input: seq) - ensures Map(Concat(left, right), first())(input) == ConcatL(left, right)(input) - { - reveal Concat(); - reveal Succeed(); - reveal ConcatL(); - reveal Map(); - reveal ConcatMap(); - } - - predicate AboutRepIncreasesPosIfUnderlyingSucceedsAtLeastOnceEnsures( - underlying: Parser, - acc: seq, - input: seq - ) - { - var result := RepSeq(underlying)(input); - && result.Success? - && |acc| <= |result.result| - && (underlying(input).Success? && |underlying(input).remaining| < |input| - ==> - (|acc| < |result.result| && |result.remaining| < |input|)) - } - - predicate AboutFix_Ensures( - underlying: Parser -> Parser, - input: seq) - { - var p := Recursive_(underlying, input); - p.Success? ==> IsRemaining(input, p.remaining) - } - - lemma {:vcs_split_on_every_assert} AboutFix_( - underlying: Parser -> Parser, - input: seq) - requires - forall callback: Parser, u: seq - | underlying(callback)(u).Success? - :: IsRemaining(input, underlying(callback)(input).Remaining()) - ensures AboutFix_Ensures(underlying, input) - { - reveal Recursive_(); - } - - - predicate AboutRecursiveMap_Ensures( - underlying: map>, - fun: string, - input: seq - ) { - var p := RecursiveMap_(underlying, fun, input); - && (p.Success? ==> IsRemaining(input, p.remaining)) - } - - - lemma Succeed_NonCrashing(result: R) - ensures Valid(Succeed(result)) - { reveal Valid(), Succeed(); } - - lemma Succeed_NonCrashingAuto() - ensures forall result: R :: Valid(Succeed(result)) - { reveal Valid(), Succeed(); } - - lemma Epsilon_NonCrashing() - ensures Valid(Epsilon()) - { reveal Valid(), Epsilon(); Succeed_NonCrashing(()); } - - lemma AboutEpsilon_(input: seq) - ensures - var p := Epsilon(); - && p(input).Success? - && p(input).remaining == input - { - reveal Epsilon(); - reveal Succeed(); - } - - lemma Fail_NonCrashing(message: string) - ensures Valid(Fail(message, Recoverable)) - { reveal Fail(); reveal Valid(); } - - lemma Fail_NonCrashingAuto() - ensures forall message :: Valid(Fail(message, Recoverable)) - { reveal Fail(); reveal Valid(); } - - ghost predicate BindRightNonCrashing(right: (L, seq) -> Parser) { - forall l: L, input: seq :: Valid(right(l, input)) - } - - lemma Bind_NonCrashing( - left: Parser, - right: (L, seq) -> Parser - ) - requires Valid(left) - requires BindRightNonCrashing(right) - ensures Valid(BindSucceeds(left, right)) - { - reveal BindSucceeds(), Valid(); - var p := BindSucceeds(left, right); - forall input: seq ensures - && (p(input).Failure? ==> p(input).level == Recoverable) - && IsRemaining(input, p(input).Remaining()) - { - - } - } - - ghost predicate Bind_NonCrashingRight(left: Parser) - requires Valid(left) - { - forall right: (L, seq) -> Parser | BindRightNonCrashing(right) :: - Valid(BindSucceeds(left, right)) - } - - lemma Bind_NonCrashingAuto() - ensures forall left: Parser | Valid(left) :: - Bind_NonCrashingRight(left) - { - forall left: Parser | Valid(left), - right: (L, seq) -> Parser | BindRightNonCrashing(right) - ensures - Valid(BindSucceeds(left, right)) - { - Bind_NonCrashing(left, right); - } - } - - lemma intToStringThenStringToIntIdem(n: int) - decreases if n < 0 then 1 - n else n - ensures 0 <= n ==> 1 <= |intToString(n)| && intToString(n)[0] != '-' - ensures stringToInt(intToString(n)) == n - { // Proof is automatic - reveal intToString(), stringToInt(), digitToInt(); - if n < 0 { - calc { - stringToInt(intToString(n)); - stringToInt("-" + intToString(-n)); - 0 - stringToInt(intToString(-n)); - { intToStringThenStringToIntIdem(-n); } - n; - } - } else if 0 <= n <= 9 { - assert stringToInt(intToString(n)) == n; - } else { - assert intToString(n) == intToString(n / 10) + intToString(n % 10); - var s := intToString(n); - } - } - opaque predicate IsStringInt(s: string): (b: bool) - ensures b ==> |s| > 0 - { - |s| > 0 && - if s[0] == '-' then - |s| > 1 && s[1] != '0' && - (forall i | 1 <= i < |s| :: s[i] in "0123456789") - else - (|s| > 1 ==> s[0] != '0') && - (forall i | 0 <= i < |s| :: s[i] in "0123456789") - } - - lemma stringToIntNonnegative(s: string) - requires IsStringInt(s) - requires s[0] != '-' - decreases |s| - ensures 0 <= stringToInt(s) - ensures s != "0" ==> 0 < stringToInt(s) - ensures |s| > 1 ==> 10 <= stringToInt(s) - { - if |s| == 0 { - - } else if |s| == 1 { - reveal digitToInt(), stringToInt(), IsStringInt(); - match s[0] - case '0' => case '1' => case '2' => case '3' => case '4' => - case '5' => case '6' => case '7' => case '8' => case '9' => - case _ => - } else if s[0] == '-' { - } else { - assert !(|s| == 0 || |s| == 1 || s[0] == '-'); - reveal stringToInt(); - assert stringToInt(s) == stringToInt(s[0..|s|-1])*10 + stringToInt(s[|s|-1..|s|]); - assert IsStringInt(s[0..|s|-1]) by { - reveal IsStringInt(); - } - stringToIntNonnegative(s[..|s|-1]); - var tail := s[|s|-1..|s|]; - assert IsStringInt(tail) && tail[0] != '-' by { - reveal IsStringInt(); - } - stringToIntNonnegative(tail); - reveal IsStringInt(); - assert |s| > 1 ==> 10 <= stringToInt(s); - } - } - - lemma stringToIntThenIntToStringIdem(s: string) - requires IsStringInt(s) - decreases |s| - ensures s[0] != '-' ==> 0 <= stringToInt(s) - ensures |s| == 1 ==> 0 <= stringToInt(s) <= 9 - ensures intToString(stringToInt(s)) == s - { - assert |s| > 0; - if 1 <= |s| && s[0] == '-' { - reveal intToString(), stringToInt(), IsStringInt(); - assert forall i | 1 <= i < |s| :: s[i] in "0123456789"; - calc { - intToString(stringToInt(s)); - intToString(0 - stringToInt(s[1..])); - } - } else if |s| == 1 { - reveal intToString(), stringToInt(), IsStringInt(), digitToInt(); - calc { - intToString(stringToInt(s)); - s; - } - } else { - var n := stringToInt(s); - stringToIntNonnegative(s); - var init := s[..|s|-1]; - var last := s[|s|-1..|s|]; - var q := stringToInt(init); - var r := stringToInt(last); - assert IsStringInt(init) by { reveal IsStringInt(); } - assert IsStringInt(last) by { reveal IsStringInt(); } - stringToIntThenIntToStringIdem(init); - stringToIntThenIntToStringIdem(last); - assert stringToInt(s) == - stringToInt(s[0..|s|-1])*10 + stringToInt(s[|s|-1..|s|]) by { - reveal stringToInt(); - } - assert n == q * 10 + r; - calc { - intToString(n); - { reveal intToString(); - assert !(n < 0); - assert n != 0; - } - intToString(n / 10) + intToString(n % 10); - s; - } - } - } -} diff --git a/src/Parsers/stringParsers.dfy b/src/Parsers/stringParsers.dfy deleted file mode 100644 index 962d7488..00000000 --- a/src/Parsers/stringParsers.dfy +++ /dev/null @@ -1,145 +0,0 @@ -include "parsers.dfy" - -module StringParsers refines Parsers { - export StringParsers extends Parsers - provides - Char, - Digit, - DigitNumber, - Nat, - Int, - String, - ExtractLineCol, - PrintFailure, - Wrappers - reveals C - - type C = char - - // ################################## - // String-specific parser combinators - // ################################## - - opaque function Char(expectedChar: char): (p: Parser) - // A parser that tests if the current char is the given expected char - { - Any((c: char) => c == expectedChar, [expectedChar]) - } - - opaque function Digit(): (p: Parser) - // A parser that tests if the current char is a digit and returns it - { - Any(c => c in "0123456789", "digit") - } - - opaque function DigitNumber(): (p: Parser) - // A parser that returns the current char as a number if it is one - { - Map(Digit(), (c: char) => - var d := digitToInt(c); - var n: nat := if d >= 0 then d else 0; - n - ) - } - - opaque function Nat(): (p: Parser) - // A parser that parses a natural number - { - Bind(DigitNumber(), - (result: nat) => - Rep(DigitNumber(), - (previous: nat, c: nat) => - var r: nat := previous * 10 + c; r, - result - ) - ) - } - - opaque function Int(): (p: Parser) - // A parser that parses a integer, possibly negative - { - Bind(Maybe(Char('-')), - (minusSign: Option) => - Map(Nat(), (result: nat) => if minusSign.Some? then 0-result else result)) - } - - opaque function String(expected: string): (p: Parser) - // A parser that succeeds only if the input starts with the given string - { - (input: string) => - if |expected| <= |input| && input[0..|expected|] == expected then Success(expected, input[|expected|..]) - else Failure(Recoverable, FailureData("expected '"+expected+"'", input, Option.None)) - } - - // ######################## - // Error handling utilities - // ######################## - - function repeat_(str: string, n: nat): (r: string) - // Repeats the given string n times - ensures |r| == |str| * n - { - if n == 0 then "" - else str + repeat_(str, n-1) - } - - method ExtractLineCol(input: string, pos: nat) - returns (lineNumber: nat, lineStr: string, colNumber: nat) - // Returns the line number, the extracted line, and the column number - // corresponding to a given position in the given input - { - lineNumber := 1; - var startLinePos: nat := 0; - colNumber := 0; - var i := 0; - while i < |input| && i != pos - invariant 0 <= startLinePos <= i <= |input| - { - colNumber := colNumber + 1; - if input[i] == '\r' && i + 1 < |input| && input[i+1] == '\n' { - lineNumber := lineNumber + 1; - colNumber := 0; - i := i + 1; - startLinePos := i + 1; - } else if input[i] in "\r\n" { - lineNumber := lineNumber + 1; - colNumber := 0; - startLinePos := i + 1; - } - i := i + 1; - } - while i < |input| && input[i] !in "\r\n" - invariant startLinePos <= i <= |input| - { - i := i + 1; - } - lineStr := input[startLinePos..i]; - } - - method PrintFailure(input: string, result: ParseResult, printPos: int := -1) - // Util to print the line, the column, and all the error messages - // associated to a given parse failure - requires result.Failure? - decreases result.data - { - if printPos == -1 { - print if result.level == Fatal then "Fatal error" else "Error", ":\n"; - } - var pos: int := |input| - |result.data.remaining|; // Need the parser to be Valid() - if pos < 0 { // Could be proved false if parser is Valid() - pos := 0; - } - if printPos != pos { - var line, lineStr, col := ExtractLineCol(input, pos); - print line, ": ", lineStr, "\n"; - print repeat_(" ", col + 2 + |intToString(line)|), "^","\n"; - } - print result.data.message; - if result.data.next.Some? { - print ", or\n"; - PrintFailure(input, Failure(result.level, result.data.next.value), pos); - } else { - print "\n"; - } - } -} \ No newline at end of file diff --git a/src/Parsers/stringParsersBuilders.dfy b/src/Parsers/stringParsersBuilders.dfy deleted file mode 100644 index e6cbb8ab..00000000 --- a/src/Parsers/stringParsersBuilders.dfy +++ /dev/null @@ -1,15 +0,0 @@ -include "stringParsers.dfy" -include "parsersBuilders.dfy" - -module StringParsersBuilders refines ParserBuilders { - import P = StringParsers - export StringParsersBuilders extends ParserBuilders - provides String, Int - - function String(s: string): B { - B(P.String(s)) - } - function Int(): B { - B(P.Int()) - } -} \ No newline at end of file From 03cd2f250452707cac9c1e05c37ab5b48078c217 Mon Sep 17 00:00:00 2001 From: Mikael Mayer Date: Wed, 15 Nov 2023 09:15:21 -0600 Subject: [PATCH 14/22] Renaming --- src/Parsers/Parsers.dfy | 615 ++++++++++++++++++ src/Parsers/ParsersBuilders.dfy | 134 ++++ src/Parsers/ParsersDisplayers.dfy | 46 ++ src/Parsers/ParsersTests.dfy | 410 ++++++++++++ src/Parsers/StringParsers.dfy | 145 +++++ src/Parsers/StringParsersBuilders.dfy | 15 + src/Parsers/examples/PolynomialParser.dfy | 119 ++++ .../examples/PolynomialParserBuilder.dfy | 112 ++++ src/Parsers/examples/Tutorial.dfy | 83 +++ src/Parsers/examples/Tutorial.dfy.expect | 0 10 files changed, 1679 insertions(+) create mode 100644 src/Parsers/Parsers.dfy create mode 100644 src/Parsers/ParsersBuilders.dfy create mode 100644 src/Parsers/ParsersDisplayers.dfy create mode 100644 src/Parsers/ParsersTests.dfy create mode 100644 src/Parsers/StringParsers.dfy create mode 100644 src/Parsers/StringParsersBuilders.dfy create mode 100644 src/Parsers/examples/PolynomialParser.dfy create mode 100644 src/Parsers/examples/PolynomialParserBuilder.dfy create mode 100644 src/Parsers/examples/Tutorial.dfy create mode 100644 src/Parsers/examples/Tutorial.dfy.expect diff --git a/src/Parsers/Parsers.dfy b/src/Parsers/Parsers.dfy new file mode 100644 index 00000000..b9caa7e5 --- /dev/null +++ b/src/Parsers/Parsers.dfy @@ -0,0 +1,615 @@ +include "../Wrappers.dfy" + +abstract module Parsers +// Functional parsers consuming sequences seq from the left to the right. +// For parsers over strings, please refer to the StringParsers module +{ + import Wrappers + + export + provides C, // The character type + Wrappers, // Imported module + Valid, + Succeed, + Epsilon, + Fail, + EndOfString, + Bind, + BindSucceeds, + BindResult, + Map, + Not, + And, + Or, + OrSeq, + Lookahead, + ?, + If, + Maybe, + ConcatMap, + Concat, + ConcatL, + ConcatR, + RepSeq, + Rep, + Recursive, + RecursiveMap, + Any, + Many, + Debug, + intToString, + digitToInt, + stringToInt, + ParseResult.IsFailure, + ParseResult.PropagateFailure, + ParseResult.Extract + reveals + Parser, + ParserSelector, + Option, // From Wrappers + FailureLevel, + ParseResult, + FailureData, + RecursiveDef + + export All reveals * + + type C(!new, ==) + // The character of the sequence being parsed + + type Parser<+R> = seq -> ParseResult + // A parser is a total function from a position to a parse result + // Because it returns a delta pos, it cannot return a position negative from the origing + // If the parsing is out of context, it will return a failure. + + type ParserSelector = string -> Parser + // A parser selector is a function that, given a name that exists, + // returns a parser associated to this name + + type Option = Wrappers.Option + // The common option type, synonym definition + + datatype FailureData = + FailureData( + message: string, + remaining: seq, + next: Option) + // A Parser failure can mention several places + // (e.g. which could have continued to parse) + { + function Concat(other: FailureData): FailureData + // Concatenates two failure datas, the first staying in the front + { + if next == Option.None then + this.(next := Option.Some(other)) + else + FailureData(message, remaining, Option.Some(next.value.Concat(other))) + } + } + + datatype FailureLevel = + // Failure level for parse results. + // A Fatal error results in a unique FailurePosition + // and will be propagated to the top ASAP + // A Recoverable error can typically be processed. + // Comittedness of the parser only depends if the .Remaining() + // of the parse result has moved since the input was provided. + Fatal | Recoverable + + datatype ParseResult<+R> = + // ParseResult is the type of what a parser taking a seq would return + | Failure(level: FailureLevel, data: FailureData) + // Returned if a parser failed. + | Success(result: R, remaining: seq) + // Returned if a parser succeeds, with the increment in the position + { + function Remaining(): seq + // If Remaining() is the same as the input, the parser is "uncommitted", + // which means combinators like Or and RepSeq can try alternatives + { + if Success? then remaining else data.remaining + } + + predicate IsFailure() { + Failure? + } + + predicate IsFatalFailure() { + Failure? && level == Fatal + } + + predicate IsFatal() + requires IsFailure() + { + level == Fatal + } + + function PropagateFailure(): ParseResult + requires IsFailure() + { + Failure(level, data) + } + + function Extract(): (R, seq) + requires !IsFailure() + { + (result, remaining) + } + + function Map(f: R -> R'): ParseResult + // Transforms the result of a successful parse result + { + match this + case Success(result, remaining) => + Success(f(result), remaining) + case Failure(level, data) => + Failure(level, data) + } + + function MapRecoverableError( + f: FailureData -> FailureData + ): ParseResult + // If the result is a recoverable error, + // let the function process it + { + match this + case Failure(Recoverable, data) => + Failure(Recoverable, f(data)) + case _ => this + } + + predicate NeedsAlternative(input: seq) + // Returns true if the parser result is a + // - A failure + // - Is recoverable + // - Did not consume any input (not-committed) + { + Failure? && level == Recoverable && input == Remaining() + } + } + + predicate IsRemaining(input: seq, remaining: seq) + // True if remaining is a suffix of the input + { + && |remaining| <= |input| + && input[|input|-|remaining|..] == remaining + } + + opaque ghost predicate Valid(underlying: Parser) + // A parser is valid iff for any input, it never returns a fatal error + // and always returns a suffix of its input + { + forall input: seq :: + && (underlying(input).Failure? ==> underlying(input).level == Recoverable) + && IsRemaining(input, underlying(input).Remaining()) + } + + // ######################################## + // Parser combinators. + // The following functions make it possible to create and compose parsers + // All these combinators provide Valid() parsers if their inputs are Valid() too + // ######################################## + + opaque function Succeed(result: R): (p: Parser) + // A parser that does not consume any input and returns the given value + { + (input: seq) => Success(result, input) + } + + opaque function Epsilon(): (p: Parser<()>) + // A parser that always succeeds, consumes nothing and returns () + { + Succeed(()) + } + + opaque function Fail(message: string, level: FailureLevel := Recoverable): Parser + // A parser that does not consume any input and returns the given failure + { + (input: seq) => Failure(level, FailureData(message, input, Option.None)) + } + + opaque function EndOfString(): Parser<()> + // A parser that fails if the string has not been entirely consumed + { + (input: seq) => + if |input| == 0 then Success((), input) + else Failure(Recoverable, FailureData("expected end of string", input, Option.None)) + } + + opaque function Bind( + left: Parser, + right: L -> Parser + ) : (p: Parser) + // Fails if the left parser fails. + // If the left parser succeeds, provides its result and the remaining sequence + // to the right parser generator. + // For a more general version, look at BindSucceeds + { + (input: seq) + => + var (leftResult, remaining) :- left(input); + right(leftResult)(remaining) + } + + opaque function BindSucceeds( + left: Parser, + right: (L, seq) -> Parser + ) : (p: Parser) + // Fails if the left parser fails. + // If the left parser succeeds, provides its result and its remaining + // to the right parser generator and returns its result applied to the remaining + // For a more general version, look at BindResult + { + (input: seq) + => + var (leftResult, remaining) :- left(input); + right(leftResult, remaining)(remaining) + } + + opaque function BindResult( + left: Parser, + right: (ParseResult, seq) -> Parser + ) : (p: Parser) + // Given a left parser and a parser generator based on the output + // of the left parser, + // returns the result of the right parser applied on the original input + { + (input: seq) + => + right(left(input), input)(input) + } + + opaque function Map(underlying: Parser, mappingFunc: R -> U) + : (p: Parser) + // A parser combinator that makes it possible to transform the result of a parser in another one + // The mapping function can be partial + // ensures forall pos | MapSpec(size, underlying, mappingFunc, pos) :: + // p.requires(pos) + { + (input: seq) => + var (result, remaining) :- underlying(input); + var u := mappingFunc(result); + Success(u, remaining) + } + + opaque function Not(underlying: Parser): Parser<()> + // Returns a parser that succeeds if the underlying parser fails + // and vice-versa. The result does not consume any input + { + (input: seq) => + var l := underlying(input); + if l.IsFailure() then + if l.IsFatal() then l.PropagateFailure() + else Success((), input) + else Failure(Recoverable, FailureData("not failed", input, Option.None)) + } + + opaque function And( + left: Parser, + right: Parser + ) : (p: Parser<(L, R)>) + // Make the two parsers parse the same string and, if both suceed, + // returns a pair of the two results, with the remaining of the right + { + (input: seq) => + var (l, remainingLeft) :- left(input); + var (r, remainingRight) :- right(input); + Success((l, r), remainingRight) + } + + opaque function Or( + left: Parser, + right: Parser + ) : (p: Parser) + // left parses the string. If left succeeds, returns + // if left fails, two cases + // - If the error is recoverable and the parser did not consume input, + // then return what right returns + // - Otherwise return both errors + { + (input: seq) => + var p := left(input); + if !p.NeedsAlternative(input) then p else + var p2 := right(input); + if !p2.NeedsAlternative(input) then p2 else + p2.MapRecoverableError( + dataRight => + p.data.Concat(dataRight)) + } + + opaque function OrSeq( + alternatives: seq> + ): Parser + { + if |alternatives| == 0 then Fail("no alternatives") else + if |alternatives| == 1 then alternatives[0] + else + Or(alternatives[0], OrSeq(alternatives[1..])) + } + + opaque function Lookahead(underlying: Parser): (p: Parser) + // If the underlying parser succeeds, + // returns its result without committing the input + // if the underlying parser fails, + // - If the failure is fatal, returns it as-it + // - If the failure is recoverable, returns it without comitting the input + { + (input: seq) => + var p := underlying(input); + if p.IsFailure() then + if p.IsFatal() then + p + else + p.(data := FailureData(p.data.message, input, Option.None)) + else + p.(remaining := input) + } + + opaque function ?(underlying: Parser): (p: Parser) + // Like Lookahead, except that if the parser succeeds, + // it keeps the committedness of the input. + // Identical to Lookahead, if the underlying parser fails, + // - If the failure is fatal, returns it as-it + // - If the failure is recoverable, returns it without comitting the input + { + (input: seq) => + var p := underlying(input); + if p.IsFailure() then + if p.IsFatal() then + p + else + p.(data := FailureData(p.data.message, input, Option.None)) + else + p + } + + opaque function If( + condition: Parser, + succeed: Parser + ) : (p: Parser) + // If the condifition fails, returns a non-committing failure + // Suitable to use in Or parsers + { + Bind(Lookahead(condition), (l: L) => succeed) + } + + opaque function Maybe(underlying: Parser): Parser> + // Transforms a recoverable failure into None, + // and wraps a success into Some(...) + { + (input: seq) => + var u := underlying(input); + if u.IsFatalFailure() then u.PropagateFailure() + else + if u.Success? then u.Map(result => Option.Some(result)) + else Success(Option.None, input) + } + + opaque function ConcatMap( + left: Parser, + right: Parser, + mapper: (L, R) -> T + ) : (p: Parser) + // Apply two consecutive parsers consecutively + // If both succeed, apply the mapper to the result and return it + { + (input: seq) + => + var (l, remaining) :- left(input); + var (r, remaining2) :- right(remaining); + Success(mapper(l, r), remaining2) + } + + opaque function Concat( + left: Parser, + right: Parser + ) : (p: Parser<(L, R)>) + // Apply two consecutive parsers consecutively + // If both succeed, return the pair of the two results + { + (input: seq) => + var (l, remaining) :- left(input); + var (r, remaining2) :- right(remaining); + Success((l, r), remaining2) + } + + opaque function ConcatR( + left: Parser, + right: Parser + ) : (p: Parser) + // Return only the result of the right parser if the two parsers match + { + ConcatMap(left, right, (l, r) => r) + } + + opaque function ConcatL( + left: Parser, + right: Parser + ) : (p: Parser) + // Return only the result of the right parser if the two parsers match + { + ConcatMap(left, right, (l, r) => l) + } + + opaque function RepSeq( + underlying: Parser + ): Parser> + // Repeats the underlying parser until the first failure + // that accepts alternatives, and returns the underlying sequence + { + Rep(underlying, (result: seq, r: R) => result + [r], []) + } + + opaque function Rep( + underlying: Parser, + combine: (A, B) -> A, + acc: A + ): Parser + // Repeats the underlying parser until the first failure + // that accepts alternatives, combining results to an accumulator + // and return the final accumulator + { + (input: seq) => Rep_(underlying, combine, acc, input) + } + + opaque function {:tailrecursion true} Rep_( + underlying: Parser, + combine: (A, B) -> A, + acc: A, + input: seq + ): (p: ParseResult) + decreases |input| + // RepSeq the underlying parser over the input until a recoverable failure happens + // and returns the accumulated results + { + match underlying(input) + case Success(result, remaining) => + if |remaining| >= |input| then Success(acc, input) else + Rep_(underlying, combine, combine(acc, result), remaining) + case failure => + if failure.NeedsAlternative(input) then + Success(acc, input) + else + failure.PropagateFailure() + } + + opaque function Recursive( + underlying: Parser -> Parser + ): (p: Parser) + // Given a function that requires a parser to return a parser, + // provide the result of this parser to that function itself. + // Careful: This function is not tail-recursive and will consume stack. + // Prefer using Rep() or RepSeq() for sequences + { + (input: seq) => Recursive_(underlying, input) + } + + opaque function Recursive_( + underlying: Parser -> Parser, + input: seq + ): (p: ParseResult) + // Implementation for Recursive() + decreases |input| + { + var callback: Parser := + (remaining: seq) => + if |remaining| < |input| then + Recursive_(underlying, remaining) + else if |remaining| == |input| then + Failure(Recoverable, FailureData("no progress", remaining, Option.None)) + else + Failure(Fatal, FailureData("fixpoint called with an increasing remaining sequence", remaining, Option.None)); + underlying(callback)(input) + } + + opaque function RecursiveMap( + underlying: map>, + fun: string): (p: Parser) + // Given a map of name := recursive definitions, + // provide the result of this parser to the recursive definitions + // and set 'fun' as the initial parser. + // Careful: This function is not tail-recursive and will consume stack + { + (input: seq) => RecursiveMap_(underlying, fun, input) + } + + datatype RecursiveDef = RecursiveDef( + order: nat, + definition: ParserSelector -> Parser + ) // The order must be decreasing every time the function steps in + // But it can jump to a bigger order if the input is consumed + + opaque function RecursiveMap_( + underlying: map>, + fun: string, + input: seq + ): (p: ParseResult) + // Implementation for RecursiveMap() + decreases |input|, if fun in underlying then underlying[fun].order else 0 + { + if fun !in underlying then Failure(Fatal, FailureData("parser '"+fun+"' not found", input, Option.None)) else + var RecursiveDef(orderFun, definitionFun) := underlying[fun]; + var callback: ParserSelector + := + (fun': string) => + (var p : Parser := + if fun' !in underlying.Keys then + Fail(fun' + " not defined", Fatal) + else + var RecursiveDef(orderFun', definitionFun') := underlying[fun']; + (remaining: seq) => + if |remaining| < |input| || (|remaining| == |input| && orderFun' < orderFun) then + RecursiveMap_(underlying, fun', remaining) + else if |remaining| == |input| then + Failure(Recoverable, FailureData("non-progressing recursive call requires that order of '" + +fun'+"' ("+intToString(orderFun')+") is lower than the order of '"+fun+"' ("+intToString(orderFun)+")", remaining, Option.None)) + else + Failure(Fatal, FailureData("parser did not return a suffix of the input", remaining, Option.None)) + ; p); + definitionFun(callback)(input) + } + + opaque function Any(test: C -> bool, name: string): (p: Parser) + // A parser that returns the current char if it passes the test + // Returns a recoverable error based on the name otherwise + { + (input: seq) => + if 0 < |input| && test(input[0]) then Success(input[0], input[1..]) + else Failure(Recoverable, + FailureData("expected a "+name, input, Option.None)) + } + + opaque function Many(test: C -> bool, name: string): (p: Parser>) + { + Bind(Any(test, name), (c: C) => + Rep(Any(test, name), (s: seq, c': C) => s + [c'], [c])) + } + + function Debug_(message: string): string { + message + } by method { + print message, "\n"; + return message; + } + + opaque function Debug(msg: string, other: Parser): (p: Parser) + // A parser that, when invoked, will print a message before applying its underlying parser + // and also afterwards + { + (input: seq) => + var _ := Debug_(msg + "(before)"); + var p := other(input); + var _ := Debug_(msg + "(after)"); + p + } + + opaque function intToString(n: int): string + // Converts an integer to a string + decreases if n < 0 then 1 - n else n + { + if n < 0 then "-" + intToString(-n) else + match n + case 0 => "0" case 1 => "1" case 2 => "2" case 3 => "3" case 4 => "4" + case 5 => "5" case 6 => "6" case 7 => "7" case 8 => "8" case 9 => "9" + case _ => intToString(n / 10) + intToString(n % 10) + } + + opaque function digitToInt(c: char): int { + match c + case '0' => 0 case '1' => 1 case '2' => 2 case '3' => 3 case '4' => 4 + case '5' => 5 case '6' => 6 case '7' => 7 case '8' => 8 case '9' => 9 + case _ => -1 + } + + opaque function stringToInt(s: string): int + // Converts a string to a string + decreases |s| + { + if |s| == 0 then 0 else + if |s| == 1 then digitToInt(s[0]) + else if s[0] == '-' then + 0 - stringToInt(s[1..]) + else + stringToInt(s[0..|s|-1])*10 + stringToInt(s[|s|-1..|s|]) + } +} \ No newline at end of file diff --git a/src/Parsers/ParsersBuilders.dfy b/src/Parsers/ParsersBuilders.dfy new file mode 100644 index 00000000..02c3d1bb --- /dev/null +++ b/src/Parsers/ParsersBuilders.dfy @@ -0,0 +1,134 @@ +include "Parsers.dfy" + +// Nice wanna-to-be DSL to build parsers to avoid too much parenthesis nesting +// B(p) returns a parser builder from a normal parser. +// B1.o_I(B2) will parse both but return the result of B2 +// B1.I_o(B2) will parse both but return the result of B1 +// B.M(f) will map the result of the parser builder by f if succeeded +// B1.O(B2) will either parse B1, or B2 if B1 fails with Recoverable +// FirstOf([B1, B2, B3]) +// will parse with B1, but if B1 fails with Recoverable, +// it will parse with B2, but if B2 fails with Recoverable, +// it will parse with B3 +// R(v) returns a parser builder that returns immediately v +// +// There are more parser builders in the trait Engine, when their spec depends on +// a predetermined input, e.g. to tests for constant strings + +abstract module ParserBuilders { + import P: Parsers + export + provides P + provides O + provides Ok + provides Fail + provides Rec + provides B.e_I + provides B.I_e + provides B.I_I + provides B.M + provides B.Maybe + provides B.Bind + provides B.Rep + provides End + provides Any, Many + reveals B + reveals RecDef, FailureLevel, RecSel + + type FailureLevel = P.FailureLevel + type RecSel = string -> B + + // Wrap the constructor in a class where the size is constant so that users + // don'result need to provide it. + datatype B = B(apply: P.Parser) + { + function Maybe(): B> { + B(P.Maybe(apply)) + } + function e_I(other: B): (p: B) + // Excludes the left, includes the right + { + B(P.ConcatR(apply, other.apply)) + } + function I_e(other: B): (p: B) + // Includes the left, excludes the right + { + B(P.ConcatL(apply, other.apply)) + } + function I_I(other: B): (p: B<(R, U)>) + // Includes the left, excludes the right + { + B(P.Concat(apply, other.apply)) + } + function M(mappingFunc: R -> U): (p: B) + // Maps the result + { + B(P.Map(apply, mappingFunc)) + } + function Bind(other: R -> B): (p: B) + { + B(P.Bind(apply, (result: R) => other(result).apply)) + } + + function Rep(init: A, combine: (A, R) -> A): (p: B) + { + B(P.Rep(apply, combine, init)) + } + } + + function Ok(result: R): (p: B) + { + B(P.Succeed(result)) + } + + function Fail(message: string, level: FailureLevel := FailureLevel.Recoverable): (p: B) + { + B(P.Fail(message, level)) + } + + function O(alternatives: seq>): B + // Declares a set of alternatives as a single list + { + if |alternatives| == 0 then Fail("no alternative") else + if |alternatives| == 1 then alternatives[0] + else + B(P.Or(alternatives[0].apply, O(alternatives[1..]).apply)) + } + + function End(): B<()> + { + B(P.EndOfString()) + } + + function Any(test: P.C -> bool, name: string): B + { + B(P.Any(test, name)) + } + + function Many(test: P.C -> bool, name: string): B> + { + B(P.Many(test, name)) + } + + + datatype RecDef = RecDef( + order: nat, + definition: RecSel -> B) + + opaque function Rec( + underlying: map>, + fun: string): (p: B) + { + B(P.RecursiveMap( + map k <- underlying :: k := + P.RecursiveDef( + underlying[k].order, + (selector: P.ParserSelector) => + underlying[k].definition( + (name: string) => + B(selector(name)) + ).apply), + fun + )) + } +} diff --git a/src/Parsers/ParsersDisplayers.dfy b/src/Parsers/ParsersDisplayers.dfy new file mode 100644 index 00000000..90b71a3c --- /dev/null +++ b/src/Parsers/ParsersDisplayers.dfy @@ -0,0 +1,46 @@ +include "Parsers.dfy" + +// From these parsers, we can create displayers +// and prove the roundtrip displayer / parser if we wanted to +abstract module ParsersDiplayers { + import Parsers`All + + type Parser = Parsers.Parser + type C = Parsers.C + + type Displayer<-R> = (R, seq) -> seq + + function Concat( + left: Displayer, + right: Displayer + ): Displayer<(A, B)> { + (ab: (A, B), remaining: seq) => + var remaining2 := right(ab.1, remaining); + var remaining3 := left(ab.0, remaining2); + remaining3 + } + + ghost predicate Roundtrip(parse: Parser, display: Displayer) + // The parser and the displayer are dual to each other + // means that if we parse after printing, we get the same result + { + forall a: A, remaining: seq :: + parse(display(a, remaining)) == Parsers.Success(a, remaining) + } + + lemma {:rlimit 1000} ConcatRoundtrip( + pA: Parser, ppA: Displayer, + pB: Parser, ppB: Displayer + ) + requires Roundtrip(pA, ppA) && Roundtrip(pB, ppB) + ensures Roundtrip(Parsers.Concat(pA, pB), Concat(ppA, ppB)) + { + reveal Parsers.Concat(); + var p := Parsers.Concat(pA, pB); + var d := Concat(ppA, ppB); + forall ab: (A, B), remaining: seq ensures + p(d(ab, remaining)) == Parsers.Success(ab, remaining) + { + } + } +} \ No newline at end of file diff --git a/src/Parsers/ParsersTests.dfy b/src/Parsers/ParsersTests.dfy new file mode 100644 index 00000000..5825c712 --- /dev/null +++ b/src/Parsers/ParsersTests.dfy @@ -0,0 +1,410 @@ +include "Parsers.dfy" + +abstract module ParserTests refines Parsers { + lemma AboutSucceed(result: R, input: seq) + ensures + var p := Succeed(result); + && p(input).Success? + && p(input).remaining == input + { reveal Succeed(); } + + lemma AboutFail_(message: string, level: FailureLevel, input: seq) + ensures + var p := Fail(message, level)(input); + && p.Failure? + && p.data == FailureData(message, input, Option.None) + && p.level == level + { + reveal Fail(); + } + + lemma AboutFail_2(message: string, input: seq) + ensures + var p := Fail(message)(input); + && p.Failure? + && p.level == Recoverable + && p.data == FailureData(message, input, Option.None) + { + reveal Fail(); + } + + lemma AboutBind_( + left: Parser, + right: (L, seq) -> Parser, + input: seq + ) + ensures + var p := BindSucceeds(left, right)(input); + && var leftResult := left(input); + && !leftResult.IsFailure() + ==> var leftValues := left(input).Extract(); + && var rightResult := right(leftValues.0, leftValues.1)(leftValues.1); + && !rightResult.IsFailure() + ==> && !p.IsFailure() + && p.remaining == rightResult.remaining + && p.result == rightResult.result + { + reveal BindSucceeds(); + } + + lemma AboutMap_(underlying: Parser, mappingFunc: R -> U, input: seq) + ensures var p := Map(underlying, mappingFunc); + && (underlying(input).Success? <==> p(input).Success?) + && (p(input).Success? ==> + && p(input).remaining == underlying(input).remaining + && p(input).result == mappingFunc(underlying(input).result)) + { + reveal Map(); + reveal BindSucceeds(); + reveal Succeed(); + } + + function BindMapCallback(mappingFunc: R -> U): + (R, seq) -> Parser + { + (result: R, remaining: seq) => Succeed(mappingFunc(result)) + } + + lemma AboutMap_Bind_(underlying: Parser, mappingFunc: R -> U, input: seq) + ensures Map(underlying, mappingFunc)(input) + == BindSucceeds(underlying, BindMapCallback(mappingFunc))(input) + { + reveal Map(); + reveal BindSucceeds(); + reveal Succeed(); + } + + lemma AboutConcat( + left: Parser, + right: Parser, + input: seq) + ensures var p := Concat(left, right); + && (p(input).Success? ==> + && left(input).Success? + && p(input).result.0 == left(input).result + && var input2 := left(input).remaining; + && right(input2).Success? + && p(input).result.1 == right(input2).result + && p(input).remaining == right(input2).remaining) + { + reveal Concat(); + reveal ConcatMap(); + } + + function BindConcatCallback(right: Parser): (L, seq) -> Parser<(L, R)> + { + (l: L, remaining: seq) => + Map(right, (r: R) => (l, r)) + } + + lemma AboutConcatBind_( + left: Parser, + right: Parser, + input: seq) + ensures Concat(left, right)(input) == BindSucceeds(left, BindConcatCallback(right))(input) + { + reveal Concat(); + reveal BindSucceeds(); + reveal Succeed(); + reveal Map(); + reveal ConcatMap(); + } + + lemma AboutConcatR( + left: Parser, + right: Parser, + input: seq) + ensures var p := ConcatR(left, right); + && (p(input).Success? ==> + && left(input).Success? + && var input2 := left(input).remaining; + && right(input2).Success? + && p(input).result == right(input2).result + && p(input).remaining == right(input2).remaining) + { + reveal ConcatR(); + reveal ConcatMap(); + } + + function first(): ((L, R)) -> L { + (lr: (L, R)) => lr.0 + } + function second(): ((L, R)) -> R { + (lr: (L, R)) => lr.1 + } + lemma AboutConcatConcatR( + left: Parser, + right: Parser, + input: seq) + ensures Map(Concat(left, right), second())(input) == ConcatR(left, right)(input) + { + reveal Concat(); + reveal Succeed(); + reveal ConcatR(); + reveal Map(); + reveal ConcatMap(); + } + + + lemma AboutConcatL( + left: Parser, + right: Parser, + input: seq) + ensures var p := ConcatL(left, right); + && (p(input).Success? ==> + && left(input).Success? + && var input2 := left(input).remaining; + && right(input2).Success? + && p(input).result == left(input).result + && p(input).remaining == right(input2).remaining) + { + reveal ConcatL(); + reveal ConcatMap(); + } + lemma AboutConcatConcatL( + left: Parser, + right: Parser, + input: seq) + ensures Map(Concat(left, right), first())(input) == ConcatL(left, right)(input) + { + reveal Concat(); + reveal Succeed(); + reveal ConcatL(); + reveal Map(); + reveal ConcatMap(); + } + + predicate AboutRepIncreasesPosIfUnderlyingSucceedsAtLeastOnceEnsures( + underlying: Parser, + acc: seq, + input: seq + ) + { + var result := RepSeq(underlying)(input); + && result.Success? + && |acc| <= |result.result| + && (underlying(input).Success? && |underlying(input).remaining| < |input| + ==> + (|acc| < |result.result| && |result.remaining| < |input|)) + } + + predicate AboutFix_Ensures( + underlying: Parser -> Parser, + input: seq) + { + var p := Recursive_(underlying, input); + p.Success? ==> IsRemaining(input, p.remaining) + } + + lemma {:vcs_split_on_every_assert} AboutFix_( + underlying: Parser -> Parser, + input: seq) + requires + forall callback: Parser, u: seq + | underlying(callback)(u).Success? + :: IsRemaining(input, underlying(callback)(input).Remaining()) + ensures AboutFix_Ensures(underlying, input) + { + reveal Recursive_(); + } + + + predicate AboutRecursiveMap_Ensures( + underlying: map>, + fun: string, + input: seq + ) { + var p := RecursiveMap_(underlying, fun, input); + && (p.Success? ==> IsRemaining(input, p.remaining)) + } + + + lemma Succeed_NonCrashing(result: R) + ensures Valid(Succeed(result)) + { reveal Valid(), Succeed(); } + + lemma Succeed_NonCrashingAuto() + ensures forall result: R :: Valid(Succeed(result)) + { reveal Valid(), Succeed(); } + + lemma Epsilon_NonCrashing() + ensures Valid(Epsilon()) + { reveal Valid(), Epsilon(); Succeed_NonCrashing(()); } + + lemma AboutEpsilon_(input: seq) + ensures + var p := Epsilon(); + && p(input).Success? + && p(input).remaining == input + { + reveal Epsilon(); + reveal Succeed(); + } + + lemma Fail_NonCrashing(message: string) + ensures Valid(Fail(message, Recoverable)) + { reveal Fail(); reveal Valid(); } + + lemma Fail_NonCrashingAuto() + ensures forall message :: Valid(Fail(message, Recoverable)) + { reveal Fail(); reveal Valid(); } + + ghost predicate BindRightNonCrashing(right: (L, seq) -> Parser) { + forall l: L, input: seq :: Valid(right(l, input)) + } + + lemma Bind_NonCrashing( + left: Parser, + right: (L, seq) -> Parser + ) + requires Valid(left) + requires BindRightNonCrashing(right) + ensures Valid(BindSucceeds(left, right)) + { + reveal BindSucceeds(), Valid(); + var p := BindSucceeds(left, right); + forall input: seq ensures + && (p(input).Failure? ==> p(input).level == Recoverable) + && IsRemaining(input, p(input).Remaining()) + { + + } + } + + ghost predicate Bind_NonCrashingRight(left: Parser) + requires Valid(left) + { + forall right: (L, seq) -> Parser | BindRightNonCrashing(right) :: + Valid(BindSucceeds(left, right)) + } + + lemma Bind_NonCrashingAuto() + ensures forall left: Parser | Valid(left) :: + Bind_NonCrashingRight(left) + { + forall left: Parser | Valid(left), + right: (L, seq) -> Parser | BindRightNonCrashing(right) + ensures + Valid(BindSucceeds(left, right)) + { + Bind_NonCrashing(left, right); + } + } + + lemma intToStringThenStringToIntIdem(n: int) + decreases if n < 0 then 1 - n else n + ensures 0 <= n ==> 1 <= |intToString(n)| && intToString(n)[0] != '-' + ensures stringToInt(intToString(n)) == n + { // Proof is automatic + reveal intToString(), stringToInt(), digitToInt(); + if n < 0 { + calc { + stringToInt(intToString(n)); + stringToInt("-" + intToString(-n)); + 0 - stringToInt(intToString(-n)); + { intToStringThenStringToIntIdem(-n); } + n; + } + } else if 0 <= n <= 9 { + assert stringToInt(intToString(n)) == n; + } else { + assert intToString(n) == intToString(n / 10) + intToString(n % 10); + var s := intToString(n); + } + } + opaque predicate IsStringInt(s: string): (b: bool) + ensures b ==> |s| > 0 + { + |s| > 0 && + if s[0] == '-' then + |s| > 1 && s[1] != '0' && + (forall i | 1 <= i < |s| :: s[i] in "0123456789") + else + (|s| > 1 ==> s[0] != '0') && + (forall i | 0 <= i < |s| :: s[i] in "0123456789") + } + + lemma stringToIntNonnegative(s: string) + requires IsStringInt(s) + requires s[0] != '-' + decreases |s| + ensures 0 <= stringToInt(s) + ensures s != "0" ==> 0 < stringToInt(s) + ensures |s| > 1 ==> 10 <= stringToInt(s) + { + if |s| == 0 { + + } else if |s| == 1 { + reveal digitToInt(), stringToInt(), IsStringInt(); + match s[0] + case '0' => case '1' => case '2' => case '3' => case '4' => + case '5' => case '6' => case '7' => case '8' => case '9' => + case _ => + } else if s[0] == '-' { + } else { + assert !(|s| == 0 || |s| == 1 || s[0] == '-'); + reveal stringToInt(); + assert stringToInt(s) == stringToInt(s[0..|s|-1])*10 + stringToInt(s[|s|-1..|s|]); + assert IsStringInt(s[0..|s|-1]) by { + reveal IsStringInt(); + } + stringToIntNonnegative(s[..|s|-1]); + var tail := s[|s|-1..|s|]; + assert IsStringInt(tail) && tail[0] != '-' by { + reveal IsStringInt(); + } + stringToIntNonnegative(tail); + reveal IsStringInt(); + assert |s| > 1 ==> 10 <= stringToInt(s); + } + } + + lemma stringToIntThenIntToStringIdem(s: string) + requires IsStringInt(s) + decreases |s| + ensures s[0] != '-' ==> 0 <= stringToInt(s) + ensures |s| == 1 ==> 0 <= stringToInt(s) <= 9 + ensures intToString(stringToInt(s)) == s + { + assert |s| > 0; + if 1 <= |s| && s[0] == '-' { + reveal intToString(), stringToInt(), IsStringInt(); + assert forall i | 1 <= i < |s| :: s[i] in "0123456789"; + calc { + intToString(stringToInt(s)); + intToString(0 - stringToInt(s[1..])); + } + } else if |s| == 1 { + reveal intToString(), stringToInt(), IsStringInt(), digitToInt(); + calc { + intToString(stringToInt(s)); + s; + } + } else { + var n := stringToInt(s); + stringToIntNonnegative(s); + var init := s[..|s|-1]; + var last := s[|s|-1..|s|]; + var q := stringToInt(init); + var r := stringToInt(last); + assert IsStringInt(init) by { reveal IsStringInt(); } + assert IsStringInt(last) by { reveal IsStringInt(); } + stringToIntThenIntToStringIdem(init); + stringToIntThenIntToStringIdem(last); + assert stringToInt(s) == + stringToInt(s[0..|s|-1])*10 + stringToInt(s[|s|-1..|s|]) by { + reveal stringToInt(); + } + assert n == q * 10 + r; + calc { + intToString(n); + { reveal intToString(); + assert !(n < 0); + assert n != 0; + } + intToString(n / 10) + intToString(n % 10); + s; + } + } + } +} diff --git a/src/Parsers/StringParsers.dfy b/src/Parsers/StringParsers.dfy new file mode 100644 index 00000000..1557a8cc --- /dev/null +++ b/src/Parsers/StringParsers.dfy @@ -0,0 +1,145 @@ +include "Parsers.dfy" + +module StringParsers refines Parsers { + export StringParsers extends Parsers + provides + Char, + Digit, + DigitNumber, + Nat, + Int, + String, + ExtractLineCol, + PrintFailure, + Wrappers + reveals C + + type C = char + + // ################################## + // String-specific parser combinators + // ################################## + + opaque function Char(expectedChar: char): (p: Parser) + // A parser that tests if the current char is the given expected char + { + Any((c: char) => c == expectedChar, [expectedChar]) + } + + opaque function Digit(): (p: Parser) + // A parser that tests if the current char is a digit and returns it + { + Any(c => c in "0123456789", "digit") + } + + opaque function DigitNumber(): (p: Parser) + // A parser that returns the current char as a number if it is one + { + Map(Digit(), (c: char) => + var d := digitToInt(c); + var n: nat := if d >= 0 then d else 0; + n + ) + } + + opaque function Nat(): (p: Parser) + // A parser that parses a natural number + { + Bind(DigitNumber(), + (result: nat) => + Rep(DigitNumber(), + (previous: nat, c: nat) => + var r: nat := previous * 10 + c; r, + result + ) + ) + } + + opaque function Int(): (p: Parser) + // A parser that parses a integer, possibly negative + { + Bind(Maybe(Char('-')), + (minusSign: Option) => + Map(Nat(), (result: nat) => if minusSign.Some? then 0-result else result)) + } + + opaque function String(expected: string): (p: Parser) + // A parser that succeeds only if the input starts with the given string + { + (input: string) => + if |expected| <= |input| && input[0..|expected|] == expected then Success(expected, input[|expected|..]) + else Failure(Recoverable, FailureData("expected '"+expected+"'", input, Option.None)) + } + + // ######################## + // Error handling utilities + // ######################## + + function repeat_(str: string, n: nat): (r: string) + // Repeats the given string n times + ensures |r| == |str| * n + { + if n == 0 then "" + else str + repeat_(str, n-1) + } + + method ExtractLineCol(input: string, pos: nat) + returns (lineNumber: nat, lineStr: string, colNumber: nat) + // Returns the line number, the extracted line, and the column number + // corresponding to a given position in the given input + { + lineNumber := 1; + var startLinePos: nat := 0; + colNumber := 0; + var i := 0; + while i < |input| && i != pos + invariant 0 <= startLinePos <= i <= |input| + { + colNumber := colNumber + 1; + if input[i] == '\r' && i + 1 < |input| && input[i+1] == '\n' { + lineNumber := lineNumber + 1; + colNumber := 0; + i := i + 1; + startLinePos := i + 1; + } else if input[i] in "\r\n" { + lineNumber := lineNumber + 1; + colNumber := 0; + startLinePos := i + 1; + } + i := i + 1; + } + while i < |input| && input[i] !in "\r\n" + invariant startLinePos <= i <= |input| + { + i := i + 1; + } + lineStr := input[startLinePos..i]; + } + + method PrintFailure(input: string, result: ParseResult, printPos: int := -1) + // Util to print the line, the column, and all the error messages + // associated to a given parse failure + requires result.Failure? + decreases result.data + { + if printPos == -1 { + print if result.level == Fatal then "Fatal error" else "Error", ":\n"; + } + var pos: int := |input| - |result.data.remaining|; // Need the parser to be Valid() + if pos < 0 { // Could be proved false if parser is Valid() + pos := 0; + } + if printPos != pos { + var line, lineStr, col := ExtractLineCol(input, pos); + print line, ": ", lineStr, "\n"; + print repeat_(" ", col + 2 + |intToString(line)|), "^","\n"; + } + print result.data.message; + if result.data.next.Some? { + print ", or\n"; + PrintFailure(input, Failure(result.level, result.data.next.value), pos); + } else { + print "\n"; + } + } +} \ No newline at end of file diff --git a/src/Parsers/StringParsersBuilders.dfy b/src/Parsers/StringParsersBuilders.dfy new file mode 100644 index 00000000..33597231 --- /dev/null +++ b/src/Parsers/StringParsersBuilders.dfy @@ -0,0 +1,15 @@ +include "StringParsers.dfy" +include "ParsersBuilders.dfy" + +module StringParsersBuilders refines ParserBuilders { + import P = StringParsers + export StringParsersBuilders extends ParserBuilders + provides String, Int + + function String(s: string): B { + B(P.String(s)) + } + function Int(): B { + B(P.Int()) + } +} \ No newline at end of file diff --git a/src/Parsers/examples/PolynomialParser.dfy b/src/Parsers/examples/PolynomialParser.dfy new file mode 100644 index 00000000..73cf8882 --- /dev/null +++ b/src/Parsers/examples/PolynomialParser.dfy @@ -0,0 +1,119 @@ +include "../StringParsers.dfy" + +module PolynomialParser { + import opened P = StringParsers + + // Parser combinators style + const parser: Parser + := ConcatL(RecursiveMap(map[ + "atom" := RecursiveDef(0, (callback: ParserSelector) => + Or(ConcatR( + String("("), ConcatL( + callback("term"), + String(")"))), + Or( + Map(Int(), (result: int) => Number(result)), ConcatR( + String("x"), + Map(Maybe(ConcatR( + String("^"), Int())), + (result: Option) => + if result.Some? then Unknown(result.value) else Unknown(1) + ))))), + "factor" := RecursiveDef(1, (callback: ParserSelector) => + Bind(callback("atom"), (atom: Expr) => + Rep( + Concat(Or(String("*"), Or(String("/"), String("%"))), + callback("atom")), + Expr.InfixBuilder(), atom) + )), + + "term" := RecursiveDef(2, (callback: ParserSelector) => + Bind(callback("factor"), (factor: Expr) => + Rep( + Concat(Or(String("+"), String("-")), + callback("factor")), + Expr.InfixBuilder(), factor) + )) + ], "term"), EndOfString()) + + type Result = Wrappers.Result + + datatype Expr = + | Binary(op: string, left: Expr, right: Expr) + | Number(value: int) + | Unknown(power: int) + { + + function Simplify(): Result { + match this { + case Number(x: int) => Result.Success(this) + case Binary(op, left, right) => + var l :- left.Simplify(); + var r :- right.Simplify(); + if l.Number? && r.Number? then + match op { + case "+" => Result.Success(Number(l.value + r.value)) + case "-" => Result.Success(Number(l.value - r.value)) + case "*" => Result.Success(Number(l.value * r.value)) + case "/" => + if r.value == 0 then + Result.Failure("Division by zero (" + right.ToString() + + " evaluates to zero)") + else + Result.Success(Number(l.value / r.value)) + case "%" => + if r.value == 0 then + Result.Failure("Modulo by zero (" + right.ToString() + + " evaluates to zero)") + else + Result.Success(Number(l.value % r.value)) + case _ => Result.Failure("Unsupported operator: " + op) + } + else + Result.Success(Binary(op, l, r)) + case Unknown(0) => Result.Success(Number(1)) + case Unknown(_) => Result.Success(this) + } + } + + static function InfixBuilder(): (Expr, (string, Expr)) -> Expr + { + (left: Expr, right: (string, Expr)) => Binary(right.0, left, right.1) + } + + function ToString(): string { + match this + case Number(x) => P.intToString(x) + case Binary(op, left, right) => + "(" + + left.ToString() + op + right.ToString() + + ")" + case Unknown(power) => + if power == 1 then "x" else if power == 0 then "1" else + "x^" + P.intToString(power) + } + } + + method Main(args: seq) { + if |args| <= 1 { + print "Please provide a polynomial to parse as argument\n"; + return; + } + for i := 1 to |args| { + var input := args[i]; + match parser(input) { + case Success(result, remaining) => + print "Polynomial:", result.ToString(), "\n"; + match result.Simplify() { + case Success(x) => + print "Simplified:", x.ToString(), "\n"; + case Failure(message) => + print message; + } + case failure => + PrintFailure(input, failure); + } + print "\n"; + } + } +} \ No newline at end of file diff --git a/src/Parsers/examples/PolynomialParserBuilder.dfy b/src/Parsers/examples/PolynomialParserBuilder.dfy new file mode 100644 index 00000000..0d1b9652 --- /dev/null +++ b/src/Parsers/examples/PolynomialParserBuilder.dfy @@ -0,0 +1,112 @@ +include "../StringParsersBuilders.dfy" + + +module PolynomialParsersBuilder { + import opened StringParsersBuilders + + import P = StringParsersBuilders.P + + // PArsers builder style + const parser: B + := + Rec(map[ + "atom" := RecDef(0, (c: RecSel) => + O([ + String("(").e_I(c("term")).I_e(String(")")), + Int().M((result: int) => Number(result)), + String("x").e_I(String("^").e_I(Int()).Maybe().M( + (result: P.Option) => + if result.Some? then Unknown(result.value) else Unknown(1))) + ])), + + "factor" := RecDef(1, (c: RecSel) => + c("atom").Bind((atom: Expr) => // TODO: Finish this one + O([String("*"), String("/"), String("%")]) + .I_I(c("atom")).Rep(atom, Expr.InfixBuilder()))), + + "term" := RecDef(2, (c: RecSel) => + c("factor").Bind((atom: Expr) => + O([String("+"), String("-")]) + .I_I(c("factor")).Rep(atom, Expr.InfixBuilder()))) + ], "term") + .I_e(End()) + + type Result = StringParsersBuilders.P.Wrappers.Result + + datatype Expr = + | Binary(op: string, left: Expr, right: Expr) + | Number(value: int) + | Unknown(power: int) + { + + function Simplify(): Result { + match this { + case Number(x: int) => Result.Success(this) + case Binary(op, left, right) => + var l :- left.Simplify(); + var r :- right.Simplify(); + if l.Number? && r.Number? then + match op { + case "+" => Result.Success(Number(l.value + r.value)) + case "-" => Result.Success(Number(l.value - r.value)) + case "*" => Result.Success(Number(l.value * r.value)) + case "/" => + if r.value == 0 then + Result.Failure("Division by zero (" + right.ToString() + + " evaluates to zero)") + else + Result.Success(Number(l.value / r.value)) + case "%" => + if r.value == 0 then + Result.Failure("Modulo by zero (" + right.ToString() + + " evaluates to zero)") + else + Result.Success(Number(l.value % r.value)) + case _ => Result.Failure("Unsupported operator: " + op) + } + else + Result.Success(Binary(op, l, r)) + case Unknown(0) => Result.Success(Number(1)) + case Unknown(_) => Result.Success(this) + } + } + static function InfixBuilder(): (Expr, (string, Expr)) -> Expr + { + (left: Expr, right: (string, Expr)) => Binary(right.0, left, right.1) + } + function ToString(): string { + match this + case Number(x) => P.intToString(x) + case Binary(op, left, right) => + "(" + + left.ToString() + op + right.ToString() + + ")" + case Unknown(power) => + if power == 1 then "x" else if power == 0 then "1" else + "x^" + P.intToString(power) + } + } + + method Main(args: seq) { + if |args| <= 1 { + print "Please provide a polynomial to parse as argument\n"; + return; + } + for i := 1 to |args| { + var input := args[i]; + match parser.apply(input) { + case Success(result, remaining) => + print "Polynomial:", result.ToString(), "\n"; + match result.Simplify() { + case Success(x) => + print "Simplified:", x.ToString(), "\n"; + case Failure(message) => + print message; + } + case failure => + P.PrintFailure(input, failure); + } + print "\n"; + } + } +} \ No newline at end of file diff --git a/src/Parsers/examples/Tutorial.dfy b/src/Parsers/examples/Tutorial.dfy new file mode 100644 index 00000000..d934c915 --- /dev/null +++ b/src/Parsers/examples/Tutorial.dfy @@ -0,0 +1,83 @@ +// RUN: %test "%s" + +include "../stringParsersBuilders.dfy" + +module Tutorial.Parsers { + import opened StringParsers + + method {:test} TestSplit1() { + var nonComma: Parser := + Many((c: char) => c != ',', "non-comma"); + var p := + Bind(nonComma, (result: string) => + Rep(ConcatR(String(","), nonComma), + (acc, elem) => acc + [elem], + [result] + )); + + expect p("abc,d,efg") == ParseResult.Success(["abc","d","efg"], ""); + expect p("abc,d,,") == + ParseResult.Failure(Recoverable, FailureData("expected a non-comma", ",", Option.None)); + PrintFailure("abc,d,,", p("abc,d,,")); + // Displays + // Error: + // 1: abc,d,, + // ^ + // expected a non-comma + } + + function flatten(): ((A, (A, A))) -> (A, A, A) { + (input: (A, (A, A))) => + (input.0, input.1.0, input.1.1) + } + + method {:test} TestTicTacToe() { + var x := OrSeq([ + String("O"), String("X"), String(" ") + ]); + var v := String("|"); + var row := Map(Concat(x, ConcatR(v, Concat(x, ConcatR(v, x)))), + flatten()); + var sep := String("\n-+-+-\n"); + var grid := Map( + Concat(row, ConcatR(sep, Concat(row, ConcatR(sep, row)))), + flatten<(string, string, string)>()) + ; + var input := "O|X| \n-+-+-\nX|O| \n-+-+-\nP| |O"; + // 012345 678901 234567 890123 45678 + var r := grid(input); + expect r.IsFailure(); + expect |input| - |r.data.remaining| == 24; + expect r.data.message == "expected 'O'"; + expect r.data.next.Some?; + expect r.data.next.value.message == "expected 'X'"; + expect r.data.next.value.next.Some?; + expect r.data.next.value.next.value.message == "expected ' '"; + expect r.data.next.value.next.value.next.None?; + PrintFailure(input, r); + // Displays: + // Error: + // 5: P| |O + // ^ + // expected 'O', or + // expected 'X', or + // expected ' ' + } +} + + +module Tutorial.ParsersBuilders { + import opened StringParsersBuilders + + method {:test} TestSplit1() { + var nonComma: B := + Many((c: char) => c != ',', "non-comma"); + var p := + nonComma.Bind((result: string) => + String(",").e_I(nonComma).Rep([result], + (acc: seq, elem: string) => acc + [elem] + )); + + expect p.apply("abc,d,efg") == P.ParseResult.Success(["abc","d","efg"], ""); + } +} \ No newline at end of file diff --git a/src/Parsers/examples/Tutorial.dfy.expect b/src/Parsers/examples/Tutorial.dfy.expect new file mode 100644 index 00000000..e69de29b From d919b50d38eb1e9ef76c8ca5ee035be7f99ff2b2 Mon Sep 17 00:00:00 2001 From: Mikael Mayer Date: Wed, 15 Nov 2023 16:33:01 -0600 Subject: [PATCH 15/22] Ability to parse small Dafny files --- src/Parsers/Parsers.dfy | 30 ++--- src/Parsers/ParsersBuilders.dfy | 47 +++++--- src/Parsers/ParsersTests.dfy | 38 +++--- src/Parsers/README.md | 2 +- src/Parsers/StringParsers.dfy | 18 ++- src/Parsers/StringParsersBuilders.dfy | 18 ++- src/Parsers/examples/DafnyParser.dfy | 109 ++++++++++++++++++ .../examples/PolynomialParserBuilder.dfy | 16 +-- src/Parsers/examples/Tutorial.dfy | 6 +- 9 files changed, 220 insertions(+), 64 deletions(-) create mode 100644 src/Parsers/examples/DafnyParser.dfy diff --git a/src/Parsers/Parsers.dfy b/src/Parsers/Parsers.dfy index b9caa7e5..a1636d87 100644 --- a/src/Parsers/Parsers.dfy +++ b/src/Parsers/Parsers.dfy @@ -30,12 +30,12 @@ abstract module Parsers Concat, ConcatL, ConcatR, - RepSeq, Rep, + CharTest, + ZeroOrMore, + OneOrMore, Recursive, RecursiveMap, - Any, - Many, Debug, intToString, digitToInt, @@ -105,7 +105,7 @@ abstract module Parsers { function Remaining(): seq // If Remaining() is the same as the input, the parser is "uncommitted", - // which means combinators like Or and RepSeq can try alternatives + // which means combinators like Or and ZeroOrMore can try alternatives { if Success? then remaining else data.remaining } @@ -431,7 +431,7 @@ abstract module Parsers ConcatMap(left, right, (l, r) => l) } - opaque function RepSeq( + opaque function ZeroOrMore( underlying: Parser ): Parser> // Repeats the underlying parser until the first failure @@ -440,6 +440,14 @@ abstract module Parsers Rep(underlying, (result: seq, r: R) => result + [r], []) } + opaque function OneOrMore(underlying: Parser): (p: Parser>) + // Repeats the underlying parser until the first failure + // Will return a failure if there is not at least one match + { + Bind(underlying, (r: R) => + Rep(underlying, (s: seq, r': R) => s + [r'], [r])) + } + opaque function Rep( underlying: Parser, combine: (A, B) -> A, @@ -459,7 +467,7 @@ abstract module Parsers input: seq ): (p: ParseResult) decreases |input| - // RepSeq the underlying parser over the input until a recoverable failure happens + // ZeroOrMore the underlying parser over the input until a recoverable failure happens // and returns the accumulated results { match underlying(input) @@ -479,7 +487,7 @@ abstract module Parsers // Given a function that requires a parser to return a parser, // provide the result of this parser to that function itself. // Careful: This function is not tail-recursive and will consume stack. - // Prefer using Rep() or RepSeq() for sequences + // Prefer using Rep() or ZeroOrMore() for sequences { (input: seq) => Recursive_(underlying, input) } @@ -549,7 +557,7 @@ abstract module Parsers definitionFun(callback)(input) } - opaque function Any(test: C -> bool, name: string): (p: Parser) + opaque function CharTest(test: C -> bool, name: string): (p: Parser) // A parser that returns the current char if it passes the test // Returns a recoverable error based on the name otherwise { @@ -559,12 +567,6 @@ abstract module Parsers FailureData("expected a "+name, input, Option.None)) } - opaque function Many(test: C -> bool, name: string): (p: Parser>) - { - Bind(Any(test, name), (c: C) => - Rep(Any(test, name), (s: seq, c': C) => s + [c'], [c])) - } - function Debug_(message: string): string { message } by method { diff --git a/src/Parsers/ParsersBuilders.dfy b/src/Parsers/ParsersBuilders.dfy index 02c3d1bb..a18cded1 100644 --- a/src/Parsers/ParsersBuilders.dfy +++ b/src/Parsers/ParsersBuilders.dfy @@ -27,24 +27,31 @@ abstract module ParserBuilders { provides B.I_e provides B.I_I provides B.M - provides B.Maybe + provides B.? + provides B.?? provides B.Bind provides B.Rep + provides B.ZeroOrMore + provides B.OneOrMore provides End - provides Any, Many + reveals CharTest reveals B - reveals RecDef, FailureLevel, RecSel + reveals Rec, RecMap + reveals RecMapDef, FailureLevel, RecMapSel type FailureLevel = P.FailureLevel - type RecSel = string -> B + type RecMapSel = string -> B // Wrap the constructor in a class where the size is constant so that users // don'result need to provide it. datatype B = B(apply: P.Parser) { - function Maybe(): B> { + function ?(): B> { B(P.Maybe(apply)) } + function ??(): B { + B(P.?(apply)) + } function e_I(other: B): (p: B) // Excludes the left, includes the right { @@ -74,6 +81,16 @@ abstract module ParserBuilders { { B(P.Rep(apply, combine, init)) } + + function ZeroOrMore(): (p: B>) + { + B(P.ZeroOrMore(apply)) + } + + function OneOrMore(): (p: B>) + { + B(P.OneOrMore(apply)) + } } function Ok(result: R): (p: B) @@ -100,23 +117,25 @@ abstract module ParserBuilders { B(P.EndOfString()) } - function Any(test: P.C -> bool, name: string): B + function CharTest(test: P.C -> bool, name: string): B { - B(P.Any(test, name)) + B(P.CharTest(test, name)) } - function Many(test: P.C -> bool, name: string): B> + opaque function Rec( + underlying: B -> B + ): B { - B(P.Many(test, name)) + B(P.Recursive((p: P.Parser) => + underlying(B(p)).apply)) } - - datatype RecDef = RecDef( + datatype RecMapDef = RecMapDef( order: nat, - definition: RecSel -> B) + definition: RecMapSel -> B) - opaque function Rec( - underlying: map>, + opaque function RecMap( + underlying: map>, fun: string): (p: B) { B(P.RecursiveMap( diff --git a/src/Parsers/ParsersTests.dfy b/src/Parsers/ParsersTests.dfy index 5825c712..dc6d51f4 100644 --- a/src/Parsers/ParsersTests.dfy +++ b/src/Parsers/ParsersTests.dfy @@ -97,7 +97,7 @@ abstract module ParserTests refines Parsers { Map(right, (r: R) => (l, r)) } - lemma AboutConcatBind_( + lemma AboutConcatBindSucceeds( left: Parser, right: Parser, input: seq) @@ -196,7 +196,7 @@ abstract module ParserTests refines Parsers { p.Success? ==> IsRemaining(input, p.remaining) } - lemma {:vcs_split_on_every_assert} AboutFix_( + lemma {:vcs_split_on_every_assert} AboutFix( underlying: Parser -> Parser, input: seq) requires @@ -219,19 +219,19 @@ abstract module ParserTests refines Parsers { } - lemma Succeed_NonCrashing(result: R) + lemma SucceedValid(result: R) ensures Valid(Succeed(result)) { reveal Valid(), Succeed(); } - lemma Succeed_NonCrashingAuto() + lemma SucceedValidAuto() ensures forall result: R :: Valid(Succeed(result)) { reveal Valid(), Succeed(); } - lemma Epsilon_NonCrashing() + lemma EpsilonValid() ensures Valid(Epsilon()) - { reveal Valid(), Epsilon(); Succeed_NonCrashing(()); } + { reveal Valid(), Epsilon(); Succeed_Valid(()); } - lemma AboutEpsilon_(input: seq) + lemma AboutEpsilon(input: seq) ensures var p := Epsilon(); && p(input).Success? @@ -241,24 +241,24 @@ abstract module ParserTests refines Parsers { reveal Succeed(); } - lemma Fail_NonCrashing(message: string) + lemma FailValid(message: string) ensures Valid(Fail(message, Recoverable)) { reveal Fail(); reveal Valid(); } - lemma Fail_NonCrashingAuto() + lemma FailValidAuto() ensures forall message :: Valid(Fail(message, Recoverable)) { reveal Fail(); reveal Valid(); } - ghost predicate BindRightNonCrashing(right: (L, seq) -> Parser) { + ghost predicate BindRightValid(right: (L, seq) -> Parser) { forall l: L, input: seq :: Valid(right(l, input)) } - lemma Bind_NonCrashing( + lemma BindSucceedsValid( left: Parser, right: (L, seq) -> Parser ) requires Valid(left) - requires BindRightNonCrashing(right) + requires BindRightValid(right) ensures Valid(BindSucceeds(left, right)) { reveal BindSucceeds(), Valid(); @@ -271,23 +271,23 @@ abstract module ParserTests refines Parsers { } } - ghost predicate Bind_NonCrashingRight(left: Parser) + ghost predicate BindValidRight(left: Parser) requires Valid(left) { - forall right: (L, seq) -> Parser | BindRightNonCrashing(right) :: + forall right: (L, seq) -> Parser | BindRightValid(right) :: Valid(BindSucceeds(left, right)) } - lemma Bind_NonCrashingAuto() + lemma BindValidAuto() ensures forall left: Parser | Valid(left) :: - Bind_NonCrashingRight(left) + BindValidRight(left) { forall left: Parser | Valid(left), - right: (L, seq) -> Parser | BindRightNonCrashing(right) + right: (L, seq) -> Parser | BindRightValid(right) ensures Valid(BindSucceeds(left, right)) { - Bind_NonCrashing(left, right); + BindValid(left, right); } } @@ -295,7 +295,7 @@ abstract module ParserTests refines Parsers { decreases if n < 0 then 1 - n else n ensures 0 <= n ==> 1 <= |intToString(n)| && intToString(n)[0] != '-' ensures stringToInt(intToString(n)) == n - { // Proof is automatic + { reveal intToString(), stringToInt(), digitToInt(); if n < 0 { calc { diff --git a/src/Parsers/README.md b/src/Parsers/README.md index cd72f5c0..01ed0549 100644 --- a/src/Parsers/README.md +++ b/src/Parsers/README.md @@ -1,6 +1,6 @@ # Verified Parser Combinators -Parser combinators in Dafny, inspired from the model (Meijer 1996). +Parser combinators in Dafny, inspired from the model (Meijer&Hutton 1996). This library offers two styles of functional parser combinators. diff --git a/src/Parsers/StringParsers.dfy b/src/Parsers/StringParsers.dfy index 1557a8cc..a32b4edb 100644 --- a/src/Parsers/StringParsers.dfy +++ b/src/Parsers/StringParsers.dfy @@ -11,7 +11,9 @@ module StringParsers refines Parsers { String, ExtractLineCol, PrintFailure, - Wrappers + Wrappers, + Space, + WS reveals C type C = char @@ -23,13 +25,23 @@ module StringParsers refines Parsers { opaque function Char(expectedChar: char): (p: Parser) // A parser that tests if the current char is the given expected char { - Any((c: char) => c == expectedChar, [expectedChar]) + CharTest((c: char) => c == expectedChar, [expectedChar]) + } + + opaque function Space(): (p: Parser) + { + CharTest(c => c in " \t\r\n", "space") + } + + opaque function WS(): (p: Parser) + { + ZeroOrMore(Space()) } opaque function Digit(): (p: Parser) // A parser that tests if the current char is a digit and returns it { - Any(c => c in "0123456789", "digit") + CharTest(c => c in "0123456789", "digit") } opaque function DigitNumber(): (p: Parser) diff --git a/src/Parsers/StringParsersBuilders.dfy b/src/Parsers/StringParsersBuilders.dfy index 33597231..fb40be9f 100644 --- a/src/Parsers/StringParsersBuilders.dfy +++ b/src/Parsers/StringParsersBuilders.dfy @@ -4,12 +4,26 @@ include "ParsersBuilders.dfy" module StringParsersBuilders refines ParserBuilders { import P = StringParsers export StringParsersBuilders extends ParserBuilders - provides String, Int + provides S, Int, WS, Except, ParseTest - function String(s: string): B { + function S(s: string): B { B(P.String(s)) } function Int(): B { B(P.Int()) } + function WS(): B { + B(P.WS()) + } + function Except(s: string): B { + B(P.ZeroOrMore(P.CharTest((c: char) => c !in s, s))) + } + method ParseTest(p: B, input: string) { + var result := p.apply(input); + if result.Failure? { + P.PrintFailure(input, result); + } else { + print result.result; + } + } } \ No newline at end of file diff --git a/src/Parsers/examples/DafnyParser.dfy b/src/Parsers/examples/DafnyParser.dfy new file mode 100644 index 00000000..82e63360 --- /dev/null +++ b/src/Parsers/examples/DafnyParser.dfy @@ -0,0 +1,109 @@ +// RUN: %test "%s" + +include "../stringParsersBuilders.dfy" + +// A parser that can self-parse +module DafnyParser { + import opened StringParsersBuilders + + type Option = StringParsersBuilders.P.Option + + datatype Program = + Program(includes: seq, declarations: seq) + + datatype Declaration = + | Module(moduleName: Type, declarations: seq) + | Import(opend: bool, imported: Type) + | Datatype(datatypeName: Type, constructors: seq) + | Const(name: string, tpe: Option, constDef: Expr) + | TypeSynonymDecl(typeName: Type, typeArgs: seq, typeDef: Type) + + datatype Constructor = + Constructor(name: string, formals: seq) + + datatype Formal = + Formal(name: Option, tpe: Type) + + datatype Type = + | TypeName(name: string) + | ApplyType(underlying: Type, args: seq) + | SelectType(prefix: Type, field: Type) + { + function applyPrefix(name: string): Type { + match this { + case ApplyType(underlying, args) => ApplyType(underlying.applyPrefix(name), args) + case SelectType(enclosing, field) => SelectType(enclosing.applyPrefix(name), field) + case _ => SelectType(TypeName(name), this) + } + } + } + + datatype Expr = + | TODO + + const stringLit := + S("\"").e_I(Except("\"")).I_e(S("\"")) + + /*const parserImport := S("import").e_I(WS()).e_I( + S("opened").e_I(WS()).Maybe()).I_I(stringLit).M( + (s: (Option, string)) => Import(s.0.Some?, s.1));*/ + const parseInclude := WS().e_I(S("include")).??().e_I(WS()).e_I(stringLit) + + const parseIdentifier := CharTest((c: char) => c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_?$", "Identifier character") + .OneOrMore() + + const parseType: B := + Rec((rec: B) => + parseIdentifier.Bind((id: string) => + var init := TypeName(id); + O([WS().e_I(S("<")).??().e_I( + rec.Bind((t: Type) => + WS().e_I(S(",")).??().e_I(rec).ZeroOrMore() + ).I_e(WS().I_e(S(">"))).M((types: seq) => + ApplyType(TypeName(id), types) + )), + WS().e_I(S(".")).??().e_I(rec).M( + (tpe: Type) => + tpe.applyPrefix(id) + ), + Ok(init) + ]) + )) + + const parseConstructor: B := Fail("parseConstructor not implemented yet") + + const parseDeclaration: B := + Rec((declParser: B) => + O([ + WS().e_I(S("module")).??().e_I(WS()).e_I(parseType).I_e(WS()).I_e(S("{")). + I_I(declParser.ZeroOrMore()).I_e(WS()).I_e(S("}")).M((r: (Type, seq)) => + Module(r.0, r.1)), + WS().e_I(S("import")).??().e_I(WS()).e_I(S("opened").e_I(WS()).?()).I_I(parseType).M( + (s: (Option, Type)) => Import(s.0.Some?, s.1)), + WS().e_I(S("datatype")).??().e_I(WS()).e_I(parseType).I_e(WS().e_I(S("="))).I_I( + parseConstructor.OneOrMore()).M((r: (Type, seq)) => + Datatype(r.0, r.1) + ) + ])) + + const parseProgram := + parseInclude.ZeroOrMore().I_I(parseDeclaration.ZeroOrMore()).M( + (idecls: (seq, seq)) => + Program(idecls.0, idecls.1) + ) + + method {:test} TestParser() { + var program := @" +include ""file"" + +import opened test + +module Test { + module Inner { + + } +} +"; + ParseTest(parseProgram, program); + } +} \ No newline at end of file diff --git a/src/Parsers/examples/PolynomialParserBuilder.dfy b/src/Parsers/examples/PolynomialParserBuilder.dfy index 0d1b9652..3e509e13 100644 --- a/src/Parsers/examples/PolynomialParserBuilder.dfy +++ b/src/Parsers/examples/PolynomialParserBuilder.dfy @@ -9,24 +9,24 @@ module PolynomialParsersBuilder { // PArsers builder style const parser: B := - Rec(map[ - "atom" := RecDef(0, (c: RecSel) => + RecMap(map[ + "atom" := RecMapDef(0, (c: RecMapSel) => O([ - String("(").e_I(c("term")).I_e(String(")")), + S("(").e_I(c("term")).I_e(S(")")), Int().M((result: int) => Number(result)), - String("x").e_I(String("^").e_I(Int()).Maybe().M( + S("x").e_I(S("^").e_I(Int()).?().M( (result: P.Option) => if result.Some? then Unknown(result.value) else Unknown(1))) ])), - "factor" := RecDef(1, (c: RecSel) => + "factor" := RecMapDef(1, (c: RecMapSel) => c("atom").Bind((atom: Expr) => // TODO: Finish this one - O([String("*"), String("/"), String("%")]) + O([S("*"), S("/"), S("%")]) .I_I(c("atom")).Rep(atom, Expr.InfixBuilder()))), - "term" := RecDef(2, (c: RecSel) => + "term" := RecMapDef(2, (c: RecMapSel) => c("factor").Bind((atom: Expr) => - O([String("+"), String("-")]) + O([S("+"), S("-")]) .I_I(c("factor")).Rep(atom, Expr.InfixBuilder()))) ], "term") .I_e(End()) diff --git a/src/Parsers/examples/Tutorial.dfy b/src/Parsers/examples/Tutorial.dfy index d934c915..8249354c 100644 --- a/src/Parsers/examples/Tutorial.dfy +++ b/src/Parsers/examples/Tutorial.dfy @@ -7,7 +7,7 @@ module Tutorial.Parsers { method {:test} TestSplit1() { var nonComma: Parser := - Many((c: char) => c != ',', "non-comma"); + OneOrMore(CharTest((c: char) => c != ',', "non-comma")); var p := Bind(nonComma, (result: string) => Rep(ConcatR(String(","), nonComma), @@ -71,10 +71,10 @@ module Tutorial.ParsersBuilders { method {:test} TestSplit1() { var nonComma: B := - Many((c: char) => c != ',', "non-comma"); + CharTest((c: char) => c != ',', "non-comma").OneOrMore(); var p := nonComma.Bind((result: string) => - String(",").e_I(nonComma).Rep([result], + S(",").e_I(nonComma).Rep([result], (acc: seq, elem: string) => acc + [elem] )); From bf4c4bf2fb902f94afa3e3b655c7e90bdc3a8437 Mon Sep 17 00:00:00 2001 From: Mikael Mayer Date: Thu, 16 Nov 2023 21:03:07 -0600 Subject: [PATCH 16/22] RepSep and JSON parsers --- src/Parsers/Parsers.dfy | 13 +++++ src/Parsers/ParsersBuilders.dfy | 6 ++ src/Parsers/StringParsersBuilders.dfy | 10 +++- src/Parsers/examples/JSONParser.dfy | 81 +++++++++++++++++++++++++++ 4 files changed, 108 insertions(+), 2 deletions(-) create mode 100644 src/Parsers/examples/JSONParser.dfy diff --git a/src/Parsers/Parsers.dfy b/src/Parsers/Parsers.dfy index a1636d87..9139cace 100644 --- a/src/Parsers/Parsers.dfy +++ b/src/Parsers/Parsers.dfy @@ -31,6 +31,7 @@ abstract module Parsers ConcatL, ConcatR, Rep, + RepSep, CharTest, ZeroOrMore, OneOrMore, @@ -460,6 +461,18 @@ abstract module Parsers (input: seq) => Rep_(underlying, combine, acc, input) } + opaque function RepSep( + underlying: Parser, + separator: Parser + ): Parser> + // Repeats the underlying parser interleaved with a separator + // Returns a sequence of results + { + Bind(Maybe(underlying), (result: Option) => + if result.None? then Succeed>([]) else + Rep(ConcatR(separator, underlying), (acc: seq, a: A) => acc + [a], [result.value])) + } + opaque function {:tailrecursion true} Rep_( underlying: Parser, combine: (A, B) -> A, diff --git a/src/Parsers/ParsersBuilders.dfy b/src/Parsers/ParsersBuilders.dfy index a18cded1..65548834 100644 --- a/src/Parsers/ParsersBuilders.dfy +++ b/src/Parsers/ParsersBuilders.dfy @@ -31,6 +31,7 @@ abstract module ParserBuilders { provides B.?? provides B.Bind provides B.Rep + provides B.RepSep provides B.ZeroOrMore provides B.OneOrMore provides End @@ -82,6 +83,11 @@ abstract module ParserBuilders { B(P.Rep(apply, combine, init)) } + function RepSep(separator: B): (p: B>) + { + B(P.RepSep(apply, separator.apply)) + } + function ZeroOrMore(): (p: B>) { B(P.ZeroOrMore(apply)) diff --git a/src/Parsers/StringParsersBuilders.dfy b/src/Parsers/StringParsersBuilders.dfy index fb40be9f..4addec7b 100644 --- a/src/Parsers/StringParsersBuilders.dfy +++ b/src/Parsers/StringParsersBuilders.dfy @@ -4,7 +4,7 @@ include "ParsersBuilders.dfy" module StringParsersBuilders refines ParserBuilders { import P = StringParsers export StringParsersBuilders extends ParserBuilders - provides S, Int, WS, Except, ParseTest + provides S, Int, WS, Except, ParseTest, Digit, DigitNumber function S(s: string): B { B(P.String(s)) @@ -12,6 +12,12 @@ module StringParsersBuilders refines ParserBuilders { function Int(): B { B(P.Int()) } + function Digit(): B { + B(P.Digit()) + } + function DigitNumber(): B { + B(P.DigitNumber()) + } function WS(): B { B(P.WS()) } @@ -23,7 +29,7 @@ module StringParsersBuilders refines ParserBuilders { if result.Failure? { P.PrintFailure(input, result); } else { - print result.result; + print result.result, "\n"; } } } \ No newline at end of file diff --git a/src/Parsers/examples/JSONParser.dfy b/src/Parsers/examples/JSONParser.dfy new file mode 100644 index 00000000..c7220239 --- /dev/null +++ b/src/Parsers/examples/JSONParser.dfy @@ -0,0 +1,81 @@ +// RUN: %test "%s" + +include "../stringParsersBuilders.dfy" + +// A parser that can parse a JSON-like structure +// Strings however are parsed without unicode escape. +module JSONParser { + import opened StringParsersBuilders + datatype Decimal = + Decimal(n: int, e10: int) // (n) * 10^(e10) + + function ToDecimal(n: int): Decimal { + Decimal(n, 0) + } + + function ToDecimalFrac(n: int, digits: seq, e10: int := 0): Decimal + decreases |digits| + { + if digits == [] then Decimal(n, e10) + else ToDecimalFrac(n * 10 + digits[0], digits[1..], e10 - 1) + } + + datatype JSON = + | Null + | Bool(b: bool) + | String(str: string) + | Number(num: Decimal) + | Object(obj: seq<(string, JSON)>) // Not a map to preserve order + | Array(arr: seq) + + const nullParser: B := WS().e_I(S("null")).??().M((s) => Null) + const boolParser: B := WS().e_I(O([S("true"), S("false")])).??().M((s: string) => + Bool(s == "true")) + const stringCharParser: B := + O([ + S("\\\"").??().M((s: string) => '"'), + S("\\\\").??().M((s: string) => '\\'), + CharTest((c: char) => c != '\\' && c != '"', "no escape no quote" + ).??() + ]).ZeroOrMore() + + const stringParser: B := WS().e_I(S("\"")).??().e_I(stringCharParser).I_e(S("\"")) + + const stringJSONParser: B := stringParser.M((s: string) => String(s)) + + const numberJSONParser: B := + Int().I_I(S(".").e_I(DigitNumber().ZeroOrMore()).?()).M( + (s: (int, P.Option>)) => + if s.1.None? then Number(ToDecimal(s.0)) + else Number(ToDecimalFrac(s.0, s.1.value, 0))) + + const arrayParser: B -> B := (rec: B) => + WS().e_I(S("[")).??().e_I( + WS().e_I(rec).RepSep(WS().e_I(S(","))).M((s: seq) => Array(s)) + .I_e(WS()).I_e(S("]"))) + + const objectParser: B -> B := (rec: B) => + WS().e_I(S("{")).??().e_I( + WS().e_I(stringParser).I_I(WS().e_I(S(":").e_I(WS()).e_I(rec))) + .RepSep(WS().e_I(S(","))).M((s: seq<(string, JSON)>) => Object(s)) + .I_e(WS()).I_e(S("}"))) + + const parseProgram: B := + Rec((rec: B) => + O([ + nullParser, + boolParser, + stringJSONParser, + numberJSONParser, + arrayParser(rec), + objectParser(rec) + ])).I_e(End()) + + method {:test} TestParser() { + var source := @"{""a"": null, ""b"": [1.42, 25.150]}"; + ParseTest(parseProgram, source); + ParseTest(parseProgram, "[ ]"); + source := @"[true, false, null]"; + ParseTest(parseProgram, source); + } +} \ No newline at end of file From c6a57a8b4c2efd1e9c0ec388fed4df45d8f88319 Mon Sep 17 00:00:00 2001 From: Mikael Mayer Date: Fri, 17 Nov 2023 11:19:00 -0600 Subject: [PATCH 17/22] Added If to the builders --- src/Parsers/ParsersBuilders.dfy | 4 ++++ src/Parsers/ParsersTests.dfy | 6 +++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/Parsers/ParsersBuilders.dfy b/src/Parsers/ParsersBuilders.dfy index 65548834..6b44d31d 100644 --- a/src/Parsers/ParsersBuilders.dfy +++ b/src/Parsers/ParsersBuilders.dfy @@ -27,6 +27,7 @@ abstract module ParserBuilders { provides B.I_e provides B.I_I provides B.M + provides B.If provides B.? provides B.?? provides B.Bind @@ -68,6 +69,9 @@ abstract module ParserBuilders { { B(P.Concat(apply, other.apply)) } + function If(thn: B): (p: B) { + B(P.If(apply, thn.apply)) + } function M(mappingFunc: R -> U): (p: B) // Maps the result { diff --git a/src/Parsers/ParsersTests.dfy b/src/Parsers/ParsersTests.dfy index dc6d51f4..2735d589 100644 --- a/src/Parsers/ParsersTests.dfy +++ b/src/Parsers/ParsersTests.dfy @@ -180,7 +180,7 @@ abstract module ParserTests refines Parsers { input: seq ) { - var result := RepSeq(underlying)(input); + var result := ZeroOrMore(underlying)(input); && result.Success? && |acc| <= |result.result| && (underlying(input).Success? && |underlying(input).remaining| < |input| @@ -229,7 +229,7 @@ abstract module ParserTests refines Parsers { lemma EpsilonValid() ensures Valid(Epsilon()) - { reveal Valid(), Epsilon(); Succeed_Valid(()); } + { reveal Valid(), Epsilon(); SucceedValid(()); } lemma AboutEpsilon(input: seq) ensures @@ -287,7 +287,7 @@ abstract module ParserTests refines Parsers { ensures Valid(BindSucceeds(left, right)) { - BindValid(left, right); + BindSucceedsValid(left, right); } } From be0d7968dbfe80880fb63a41b1d8611faa6aa598 Mon Sep 17 00:00:00 2001 From: Mikael Mayer Date: Fri, 17 Nov 2023 11:38:58 -0600 Subject: [PATCH 18/22] Updated readme --- src/Parsers/README.md | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/Parsers/README.md b/src/Parsers/README.md index 01ed0549..9e0b6a96 100644 --- a/src/Parsers/README.md +++ b/src/Parsers/README.md @@ -89,4 +89,21 @@ abstract module ParsersBuilders { module StringParsersBuilders { import P = StringParsers } -``` \ No newline at end of file +``` + + +## FAQ: + +### What properties can we use it to prove? + +* You get for free that parsers terminate, at worst with a run-time "fatal" parser result "no progress and got back to the same function" +* You can actually prove the absence of fatal errors. I have several lemmas that propagate this property through parsers combinators. I'm working on a lemma for the recursive case (already done in a previous formalism, need to migrate) +* You can prove the equivalence between various combinations of parser combinators (e.g. Bind/Succed == Map) +* You can use these parser combinators as a specification for optimized methods that perform the same task but perhaps with a different representation, e.g. array and position instead of sequences of characters. + +### How does it backtrack? Like Parsec (fails if tokens are consumed)? + +There are several way parsers can backtrack in the current design. +* A parser not consuming any input when returning a recoverable error can be ignored for combinators with alternatives like `Or`, `Maybe`, `If` or `ZeroOrMore` (respectively `O([...])`, `.?()`, `.If()` and `.ZeroOrMore()` if using builders) +* It's possible to transform a parser to not consume any input when it fails (except fatal errors) via the combinator `?(...)` (`.??()` if using builders). This means the failure will have the same input as previously given, making it possible for surrounding combinators to explore alternatives. +* The combinators `BindResult`, a generalization of the `Bind` combinator when considering parsers as monads, lets the user decides whether to continue on the left parser's remaining input or start from the beginning. \ No newline at end of file From f5b261d7c91d49a187d37849ce8c4728264f5594 Mon Sep 17 00:00:00 2001 From: Mikael Mayer Date: Wed, 13 Dec 2023 09:56:42 -0600 Subject: [PATCH 19/22] Parser generators I --- src/Parsers/Parsers.dfy | 27 ++++++++++ src/Parsers/ParsersBuilders.dfy | 13 +++++ src/Parsers/StringParsersBuilders.dfy | 10 +++- src/Parsers/StringParsersTests.dfy | 32 ++++++++++++ src/Parsers/examples/ParserGenerator.dfy | 64 ++++++++++++++++++++++++ 5 files changed, 145 insertions(+), 1 deletion(-) create mode 100644 src/Parsers/StringParsersTests.dfy create mode 100644 src/Parsers/examples/ParserGenerator.dfy diff --git a/src/Parsers/Parsers.dfy b/src/Parsers/Parsers.dfy index 9139cace..e1d4ea59 100644 --- a/src/Parsers/Parsers.dfy +++ b/src/Parsers/Parsers.dfy @@ -32,6 +32,8 @@ abstract module Parsers ConcatR, Rep, RepSep, + RepMerge, + RepSepMerge, CharTest, ZeroOrMore, OneOrMore, @@ -473,6 +475,31 @@ abstract module Parsers Rep(ConcatR(separator, underlying), (acc: seq, a: A) => acc + [a], [result.value])) } + opaque function RepMerge( + underlying: Parser, + merger: (A, A) -> A + ): Parser + // Repeats the underlying parser interleaved with a separator + // Returns a sequence of results + { + Bind(Maybe(underlying), (result: Option) => + if result.None? then Fail("No first element in RepMerge", Recoverable) else + Rep(underlying, (acc: A, a: A) => merger(acc, a), result.value)) + } + + opaque function RepSepMerge( + underlying: Parser, + separator: Parser, + merger: (A, A) -> A + ): Parser + // Repeats the underlying parser interleaved with a separator + // Returns a sequence of results + { + Bind(Maybe(underlying), (result: Option) => + if result.None? then Fail("No first element in RepSepMerge", Recoverable) else + Rep(ConcatR(separator, underlying), (acc: A, a: A) => merger(acc, a), result.value)) + } + opaque function {:tailrecursion true} Rep_( underlying: Parser, combine: (A, B) -> A, diff --git a/src/Parsers/ParsersBuilders.dfy b/src/Parsers/ParsersBuilders.dfy index 6b44d31d..7fefd67b 100644 --- a/src/Parsers/ParsersBuilders.dfy +++ b/src/Parsers/ParsersBuilders.dfy @@ -33,6 +33,8 @@ abstract module ParserBuilders { provides B.Bind provides B.Rep provides B.RepSep + provides B.RepMerge + provides B.RepSepMerge provides B.ZeroOrMore provides B.OneOrMore provides End @@ -92,6 +94,17 @@ abstract module ParserBuilders { B(P.RepSep(apply, separator.apply)) } + + function RepMerge(merger: (R, R) -> R): (p: B) + { + B(P.RepMerge(apply, merger)) + } + + function RepSepMerge(separator: B, merger: (R, R) -> R): (p: B) + { + B(P.RepSepMerge(apply, separator.apply, merger)) + } + function ZeroOrMore(): (p: B>) { B(P.ZeroOrMore(apply)) diff --git a/src/Parsers/StringParsersBuilders.dfy b/src/Parsers/StringParsersBuilders.dfy index 4addec7b..df02147b 100644 --- a/src/Parsers/StringParsersBuilders.dfy +++ b/src/Parsers/StringParsersBuilders.dfy @@ -4,7 +4,7 @@ include "ParsersBuilders.dfy" module StringParsersBuilders refines ParserBuilders { import P = StringParsers export StringParsersBuilders extends ParserBuilders - provides S, Int, WS, Except, ParseTest, Digit, DigitNumber + provides S, Int, WS, Except, ParseTest, Digit, DigitNumber, ParseTestCallback function S(s: string): B { B(P.String(s)) @@ -24,6 +24,14 @@ module StringParsersBuilders refines ParserBuilders { function Except(s: string): B { B(P.ZeroOrMore(P.CharTest((c: char) => c !in s, s))) } + method ParseTestCallback(p: B, input: string, printer: T -> string) { + var result := p.apply(input); + if result.Failure? { + P.PrintFailure(input, result); + } else { + print printer(result.result), "\n"; + } + } method ParseTest(p: B, input: string) { var result := p.apply(input); if result.Failure? { diff --git a/src/Parsers/StringParsersTests.dfy b/src/Parsers/StringParsersTests.dfy new file mode 100644 index 00000000..6d7ef536 --- /dev/null +++ b/src/Parsers/StringParsersTests.dfy @@ -0,0 +1,32 @@ +include "StringParsers.dfy" + +abstract module StringParserTests refines StringParsers { + datatype ParserResultArray = + ASuccess(result: string, pos: nat) + | AFailure(pos: nat) + + method ParseString(expected: string, input: array, i: nat) + returns (result: ParserResultArray) + requires i <= input.Length + ensures result.ASuccess? <==> String(expected)(input[..][i..]).Success? + ensures if result.ASuccess? then + result.result == String(expected)(input[..][i..]).result + else + result.pos == i + |input[..]|-|String(expected)(input[..][i..]).Remaining()| + { + reveal String(); + if i + |expected| <= input.Length && input[i..i+|expected|] == expected { + result := ASuccess(expected, i+|expected|); + } else { + result := AFailure(i); + assert String(expected)(input[..][i..]).Remaining() == input[..]; + } + } + method OptimizedSplit(input: string) returns (result: seq) + ensures Success(result, "") == + ConcatL(RepSep(ZeroOrMore(CharTest((c: char) => c != ',', "noncomma")), String(",")), + EndOfString())(input) + { +// var input + } +} diff --git a/src/Parsers/examples/ParserGenerator.dfy b/src/Parsers/examples/ParserGenerator.dfy new file mode 100644 index 00000000..dbe70e36 --- /dev/null +++ b/src/Parsers/examples/ParserGenerator.dfy @@ -0,0 +1,64 @@ +// RUN: %test "%s" + +include "../stringParsersBuilders.dfy" + +// A small regex-like language that can be turned into a straightforward parser +// a compiler from this language to an imperative language +// and a parser that can act +// and a parser that can act as a specification +module ParserGenerator { + import opened StringParsersBuilders + + type Option = StringParsersBuilders.P.Option + + function ToBool(): T -> bool { + t => true + } + + datatype ParserSpec = + | Const(s: string) + | And(left: ParserSpec, right: ParserSpec) + | Or(left: ParserSpec, right: ParserSpec) + | Repeat(p: ParserSpec) + { + function ToParser(): B { + match this + case Const(s) => S(s).M(ToBool()) + case And(left, right) => left.ToParser().e_I(right.ToParser()).M(ToBool()) + case Or(left, right) => O([left.ToParser().??(), right.ToParser()]).M(ToBool()) + case Repeat(x) => x.ToParser().ZeroOrMore().M(ToBool()) + } + function ToString(): string { + match this + case Const(s) => s + case And(left, right) => left.ToString() + right.ToString() + case Or(left, right) => "(" + left.ToString() + "|" + right.ToString() + ")" + case Repeat(underlying) => + var u := underlying.ToString(); + if |u| == 0 then "" else + if u[0..1] == "(" then u + "*" + else "(" + u + ")*" + } + } + + const parseSpec: B := + RecMap(map[ + "atom" := RecMapDef(0, (c: RecMapSel) => + O([ + S("(").e_I(c("or")).I_e(S(")")).Bind((atom: ParserSpec) => + S("*").?().M((star: Option) => + if star.None? then atom else Repeat(atom)) + ), + Except("()|").M((r: string) => ParserSpec.Const(r)) + ])), + "and" := RecMapDef(1, (c: RecMapSel) => + c("atom").RepMerge((atom1: ParserSpec, atom2: ParserSpec) => And(atom1, atom2))), + "or" := RecMapDef(2, (c: RecMapSel) => + c("and").RepSepMerge(S("|"), (and1: ParserSpec, and2: ParserSpec) => Or(and1, and2))) + ], "or") + + method {:test} TestParser() { + var program := "abc((de|f((g))*))ml"; + ParseTestCallback(parseSpec, program, (result: ParserSpec) => result.ToString()); + } +} \ No newline at end of file From 738b4abc08d77d620054a78a4c1226b486659970 Mon Sep 17 00:00:00 2001 From: Mikael Mayer Date: Wed, 13 Dec 2023 09:58:58 -0600 Subject: [PATCH 20/22] Double parser --- src/Parsers/examples/ParserGenerator.dfy | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Parsers/examples/ParserGenerator.dfy b/src/Parsers/examples/ParserGenerator.dfy index dbe70e36..ad47c84c 100644 --- a/src/Parsers/examples/ParserGenerator.dfy +++ b/src/Parsers/examples/ParserGenerator.dfy @@ -60,5 +60,10 @@ module ParserGenerator { method {:test} TestParser() { var program := "abc((de|f((g))*))ml"; ParseTestCallback(parseSpec, program, (result: ParserSpec) => result.ToString()); + var parser := parseSpec.apply(program); + expect parser.Success?; + var underlying := parser.result.ToParser(); + program := "abcdeml"; + print underlying.apply(program); // Should print true } } \ No newline at end of file From a48a4ac34125f95fd0056bf7201fc1514676ed40 Mon Sep 17 00:00:00 2001 From: Mikael Mayer Date: Wed, 13 Dec 2023 12:37:05 -0600 Subject: [PATCH 21/22] Advent of code --- src/Parsers/examples/AdventOfCode1.dfy | 36 ++++++++ src/Parsers/examples/ParserGenerator.dfy | 105 ++++++++++++++++++++++- 2 files changed, 137 insertions(+), 4 deletions(-) create mode 100644 src/Parsers/examples/AdventOfCode1.dfy diff --git a/src/Parsers/examples/AdventOfCode1.dfy b/src/Parsers/examples/AdventOfCode1.dfy new file mode 100644 index 00000000..6ba6c3ed --- /dev/null +++ b/src/Parsers/examples/AdventOfCode1.dfy @@ -0,0 +1,36 @@ +// RUN: %test "%s" + +include "../stringParsersBuilders.dfy" + +// A small regex-like language that can be turned into a straightforward parser +// So first we parse the parser to ParserSpec, we convert it to a parser +// and we parse the string using this parser. +// TODO: Compile this parser and prove it does the same. +module AdventOfCode1 { + import opened StringParsersBuilders + + const nonDigit := + Except("0123456789\r\n").ZeroOrMore() + + const digit := + B(P.DigitNumber()) + + const parseLine := + nonDigit.e_I(digit).Bind((first: nat) => + nonDigit.e_I(digit).??().Rep((first, first), + (pair: (nat, nat), newDigit: nat) => (pair.0, newDigit) + )).I_e(nonDigit) + + const parseInput := + parseLine.I_e(S("\r").?().e_I(S("\n").?())) + .Rep(0, (acc: int, newElem: (nat, nat)) => + acc + newElem.0 * 10 + newElem.1) + + method {:test} TestParser() { + var input := @"1abc2 +pqr3stu8vwx +a1b2c3d4e5f +treb7uchet"; + ParseTest(parseInput, input); + } +} \ No newline at end of file diff --git a/src/Parsers/examples/ParserGenerator.dfy b/src/Parsers/examples/ParserGenerator.dfy index ad47c84c..7c1c5411 100644 --- a/src/Parsers/examples/ParserGenerator.dfy +++ b/src/Parsers/examples/ParserGenerator.dfy @@ -3,9 +3,9 @@ include "../stringParsersBuilders.dfy" // A small regex-like language that can be turned into a straightforward parser -// a compiler from this language to an imperative language -// and a parser that can act -// and a parser that can act as a specification +// So first we parse the parser to ParserSpec, we convert it to a parser +// and we parse the string using this parser. +// TODO: Compile this parser and prove it does the same. module ParserGenerator { import opened StringParsersBuilders @@ -21,12 +21,19 @@ module ParserGenerator { | Or(left: ParserSpec, right: ParserSpec) | Repeat(p: ParserSpec) { + predicate OnlyAndRepeat() { + match this + case Const(s) => true + case And(left, right) => left.OnlyAndRepeat() && right.OnlyAndRepeat() + case Or(left, right) => false + case Repeat(p) => p.OnlyAndRepeat() + } function ToParser(): B { match this case Const(s) => S(s).M(ToBool()) case And(left, right) => left.ToParser().e_I(right.ToParser()).M(ToBool()) case Or(left, right) => O([left.ToParser().??(), right.ToParser()]).M(ToBool()) - case Repeat(x) => x.ToParser().ZeroOrMore().M(ToBool()) + case Repeat(x) => x.ToParser().??().ZeroOrMore().M(ToBool()) } function ToString(): string { match this @@ -66,4 +73,94 @@ module ParserGenerator { program := "abcdeml"; print underlying.apply(program); // Should print true } + + // TODO: Some kind of compilation? +/* + datatype ParserStmt = + Expect(c: char) + | Stmts(first: ParserStmt, next: PArserStmt) + | Repeat(underlying: ParserStmt) + | Break() + { + function ToProgram(indent: string := ""): string { + match this { + case Expect(c) => "if input[i] == '" + [c] + "' { }" + } + } + // (ok, cancelling) + function Run(input: string, index: nat): (bool, nat) { + if |input| <= index then (false, index) else + match this { + case Expect(c) => (input[index] == c, index + 1) + case Stmts(first) => + if s == [] then (true, index) + else + var (r, newIndex) := !s[0].Run(input, index); + if r then Stmts(s[1..]).Run(input, newIndex) + else (false, index) // We completely forget about the failure + case Break() => + case Repeat(stmts) => + stmts + } + } + + method RunImperative(input: string) returns (b: bool) + ensures b == Run(input, 0) + { + var i := + } + } + //datatype +*/ + // A ParserSpec can be compiled to this non-deterministic Automata + // We will prove that the two parsing strategies are equivalent + /*datatype Automata = Automata( + nStates: nat, + startState: nat, + transitions: set<(nat, char, nat)>, + finalState: set + ) + { + static function FromParserSpec(spec: ParserSpec): Automata { + match spec { + case Const("") => + Automata(1, 0, {}, {0}) + case Const(s) => + var a := FromParserSpec(Const(s[1..])); + var newStart := a.nStates; + Automata(a.nStates + 1, newStart, a.transitions + {(newStart, s[0], a.startState)}, a.finalState) + case Or(left, right) => + var l := FromParserSpec(left); + var r := FromParserSpec(right); + var offsetRight := (n: nat) => n + l.nStates; + var newStart := l.nStates + r.nStates + 1; + var rightTransitions := set rt <- r.transitions :: (offsetRight(rt.0), rt.1, offsetRight(rt.2)); + Automata(l.nStates + r.nStates + 1, + newStart, + l.transitions + rightTransitions + + + set firstLeftTransition <- l.transitions | + firstLeftTransition.0 == l.start + ) + case _ => Automata(0, 0, {}, {}) + } + } + + function Run(input: string, states: set, index: nat): set { + if index >= |input| then {} + else set newState: nat, s: nat | + 0 <= newState < nStates && 0 <= s < nStates && + (s, input[index], newState) in transitions + :: newState + } + predicate Accepts(input: string) { + Run(input, {startState}, 0) * finalState != {} + } + + lemma Equivalence(spec: ParserSpec, input: string) + ensures spec.ToParser().apply(input).Success? + <==> FromParserSpec(spec).Accepts(input) + { + + } + }*/ } \ No newline at end of file From 294be6112595d0c5c8517d5203e4521a8120be1c Mon Sep 17 00:00:00 2001 From: Mikael Mayer Date: Wed, 13 Dec 2023 12:38:35 -0600 Subject: [PATCH 22/22] Removed useless comment --- src/Parsers/examples/AdventOfCode1.dfy | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Parsers/examples/AdventOfCode1.dfy b/src/Parsers/examples/AdventOfCode1.dfy index 6ba6c3ed..89aeb172 100644 --- a/src/Parsers/examples/AdventOfCode1.dfy +++ b/src/Parsers/examples/AdventOfCode1.dfy @@ -2,10 +2,6 @@ include "../stringParsersBuilders.dfy" -// A small regex-like language that can be turned into a straightforward parser -// So first we parse the parser to ParserSpec, we convert it to a parser -// and we parse the string using this parser. -// TODO: Compile this parser and prove it does the same. module AdventOfCode1 { import opened StringParsersBuilders