diff --git a/src/Parsers/Parsers.dfy b/src/Parsers/Parsers.dfy new file mode 100644 index 00000000..e1d4ea59 --- /dev/null +++ b/src/Parsers/Parsers.dfy @@ -0,0 +1,657 @@ +include "../Wrappers.dfy" + +abstract module Parsers +// Functional parsers consuming sequences seq from the left to the right. +// For parsers over strings, please refer to the StringParsers module +{ + import Wrappers + + export + provides C, // The character type + Wrappers, // Imported module + Valid, + Succeed, + Epsilon, + Fail, + EndOfString, + Bind, + BindSucceeds, + BindResult, + Map, + Not, + And, + Or, + OrSeq, + Lookahead, + ?, + If, + Maybe, + ConcatMap, + Concat, + ConcatL, + ConcatR, + Rep, + RepSep, + RepMerge, + RepSepMerge, + CharTest, + ZeroOrMore, + OneOrMore, + Recursive, + RecursiveMap, + Debug, + intToString, + digitToInt, + stringToInt, + ParseResult.IsFailure, + ParseResult.PropagateFailure, + ParseResult.Extract + reveals + Parser, + ParserSelector, + Option, // From Wrappers + FailureLevel, + ParseResult, + FailureData, + RecursiveDef + + export All reveals * + + type C(!new, ==) + // The character of the sequence being parsed + + type Parser<+R> = seq -> ParseResult + // A parser is a total function from a position to a parse result + // Because it returns a delta pos, it cannot return a position negative from the origing + // If the parsing is out of context, it will return a failure. + + type ParserSelector = string -> Parser + // A parser selector is a function that, given a name that exists, + // returns a parser associated to this name + + type Option = Wrappers.Option + // The common option type, synonym definition + + datatype FailureData = + FailureData( + message: string, + remaining: seq, + next: Option) + // A Parser failure can mention several places + // (e.g. which could have continued to parse) + { + function Concat(other: FailureData): FailureData + // Concatenates two failure datas, the first staying in the front + { + if next == Option.None then + this.(next := Option.Some(other)) + else + FailureData(message, remaining, Option.Some(next.value.Concat(other))) + } + } + + datatype FailureLevel = + // Failure level for parse results. + // A Fatal error results in a unique FailurePosition + // and will be propagated to the top ASAP + // A Recoverable error can typically be processed. + // Comittedness of the parser only depends if the .Remaining() + // of the parse result has moved since the input was provided. + Fatal | Recoverable + + datatype ParseResult<+R> = + // ParseResult is the type of what a parser taking a seq would return + | Failure(level: FailureLevel, data: FailureData) + // Returned if a parser failed. + | Success(result: R, remaining: seq) + // Returned if a parser succeeds, with the increment in the position + { + function Remaining(): seq + // If Remaining() is the same as the input, the parser is "uncommitted", + // which means combinators like Or and ZeroOrMore can try alternatives + { + if Success? then remaining else data.remaining + } + + predicate IsFailure() { + Failure? + } + + predicate IsFatalFailure() { + Failure? && level == Fatal + } + + predicate IsFatal() + requires IsFailure() + { + level == Fatal + } + + function PropagateFailure(): ParseResult + requires IsFailure() + { + Failure(level, data) + } + + function Extract(): (R, seq) + requires !IsFailure() + { + (result, remaining) + } + + function Map(f: R -> R'): ParseResult + // Transforms the result of a successful parse result + { + match this + case Success(result, remaining) => + Success(f(result), remaining) + case Failure(level, data) => + Failure(level, data) + } + + function MapRecoverableError( + f: FailureData -> FailureData + ): ParseResult + // If the result is a recoverable error, + // let the function process it + { + match this + case Failure(Recoverable, data) => + Failure(Recoverable, f(data)) + case _ => this + } + + predicate NeedsAlternative(input: seq) + // Returns true if the parser result is a + // - A failure + // - Is recoverable + // - Did not consume any input (not-committed) + { + Failure? && level == Recoverable && input == Remaining() + } + } + + predicate IsRemaining(input: seq, remaining: seq) + // True if remaining is a suffix of the input + { + && |remaining| <= |input| + && input[|input|-|remaining|..] == remaining + } + + opaque ghost predicate Valid(underlying: Parser) + // A parser is valid iff for any input, it never returns a fatal error + // and always returns a suffix of its input + { + forall input: seq :: + && (underlying(input).Failure? ==> underlying(input).level == Recoverable) + && IsRemaining(input, underlying(input).Remaining()) + } + + // ######################################## + // Parser combinators. + // The following functions make it possible to create and compose parsers + // All these combinators provide Valid() parsers if their inputs are Valid() too + // ######################################## + + opaque function Succeed(result: R): (p: Parser) + // A parser that does not consume any input and returns the given value + { + (input: seq) => Success(result, input) + } + + opaque function Epsilon(): (p: Parser<()>) + // A parser that always succeeds, consumes nothing and returns () + { + Succeed(()) + } + + opaque function Fail(message: string, level: FailureLevel := Recoverable): Parser + // A parser that does not consume any input and returns the given failure + { + (input: seq) => Failure(level, FailureData(message, input, Option.None)) + } + + opaque function EndOfString(): Parser<()> + // A parser that fails if the string has not been entirely consumed + { + (input: seq) => + if |input| == 0 then Success((), input) + else Failure(Recoverable, FailureData("expected end of string", input, Option.None)) + } + + opaque function Bind( + left: Parser, + right: L -> Parser + ) : (p: Parser) + // Fails if the left parser fails. + // If the left parser succeeds, provides its result and the remaining sequence + // to the right parser generator. + // For a more general version, look at BindSucceeds + { + (input: seq) + => + var (leftResult, remaining) :- left(input); + right(leftResult)(remaining) + } + + opaque function BindSucceeds( + left: Parser, + right: (L, seq) -> Parser + ) : (p: Parser) + // Fails if the left parser fails. + // If the left parser succeeds, provides its result and its remaining + // to the right parser generator and returns its result applied to the remaining + // For a more general version, look at BindResult + { + (input: seq) + => + var (leftResult, remaining) :- left(input); + right(leftResult, remaining)(remaining) + } + + opaque function BindResult( + left: Parser, + right: (ParseResult, seq) -> Parser + ) : (p: Parser) + // Given a left parser and a parser generator based on the output + // of the left parser, + // returns the result of the right parser applied on the original input + { + (input: seq) + => + right(left(input), input)(input) + } + + opaque function Map(underlying: Parser, mappingFunc: R -> U) + : (p: Parser) + // A parser combinator that makes it possible to transform the result of a parser in another one + // The mapping function can be partial + // ensures forall pos | MapSpec(size, underlying, mappingFunc, pos) :: + // p.requires(pos) + { + (input: seq) => + var (result, remaining) :- underlying(input); + var u := mappingFunc(result); + Success(u, remaining) + } + + opaque function Not(underlying: Parser): Parser<()> + // Returns a parser that succeeds if the underlying parser fails + // and vice-versa. The result does not consume any input + { + (input: seq) => + var l := underlying(input); + if l.IsFailure() then + if l.IsFatal() then l.PropagateFailure() + else Success((), input) + else Failure(Recoverable, FailureData("not failed", input, Option.None)) + } + + opaque function And( + left: Parser, + right: Parser + ) : (p: Parser<(L, R)>) + // Make the two parsers parse the same string and, if both suceed, + // returns a pair of the two results, with the remaining of the right + { + (input: seq) => + var (l, remainingLeft) :- left(input); + var (r, remainingRight) :- right(input); + Success((l, r), remainingRight) + } + + opaque function Or( + left: Parser, + right: Parser + ) : (p: Parser) + // left parses the string. If left succeeds, returns + // if left fails, two cases + // - If the error is recoverable and the parser did not consume input, + // then return what right returns + // - Otherwise return both errors + { + (input: seq) => + var p := left(input); + if !p.NeedsAlternative(input) then p else + var p2 := right(input); + if !p2.NeedsAlternative(input) then p2 else + p2.MapRecoverableError( + dataRight => + p.data.Concat(dataRight)) + } + + opaque function OrSeq( + alternatives: seq> + ): Parser + { + if |alternatives| == 0 then Fail("no alternatives") else + if |alternatives| == 1 then alternatives[0] + else + Or(alternatives[0], OrSeq(alternatives[1..])) + } + + opaque function Lookahead(underlying: Parser): (p: Parser) + // If the underlying parser succeeds, + // returns its result without committing the input + // if the underlying parser fails, + // - If the failure is fatal, returns it as-it + // - If the failure is recoverable, returns it without comitting the input + { + (input: seq) => + var p := underlying(input); + if p.IsFailure() then + if p.IsFatal() then + p + else + p.(data := FailureData(p.data.message, input, Option.None)) + else + p.(remaining := input) + } + + opaque function ?(underlying: Parser): (p: Parser) + // Like Lookahead, except that if the parser succeeds, + // it keeps the committedness of the input. + // Identical to Lookahead, if the underlying parser fails, + // - If the failure is fatal, returns it as-it + // - If the failure is recoverable, returns it without comitting the input + { + (input: seq) => + var p := underlying(input); + if p.IsFailure() then + if p.IsFatal() then + p + else + p.(data := FailureData(p.data.message, input, Option.None)) + else + p + } + + opaque function If( + condition: Parser, + succeed: Parser + ) : (p: Parser) + // If the condifition fails, returns a non-committing failure + // Suitable to use in Or parsers + { + Bind(Lookahead(condition), (l: L) => succeed) + } + + opaque function Maybe(underlying: Parser): Parser> + // Transforms a recoverable failure into None, + // and wraps a success into Some(...) + { + (input: seq) => + var u := underlying(input); + if u.IsFatalFailure() then u.PropagateFailure() + else + if u.Success? then u.Map(result => Option.Some(result)) + else Success(Option.None, input) + } + + opaque function ConcatMap( + left: Parser, + right: Parser, + mapper: (L, R) -> T + ) : (p: Parser) + // Apply two consecutive parsers consecutively + // If both succeed, apply the mapper to the result and return it + { + (input: seq) + => + var (l, remaining) :- left(input); + var (r, remaining2) :- right(remaining); + Success(mapper(l, r), remaining2) + } + + opaque function Concat( + left: Parser, + right: Parser + ) : (p: Parser<(L, R)>) + // Apply two consecutive parsers consecutively + // If both succeed, return the pair of the two results + { + (input: seq) => + var (l, remaining) :- left(input); + var (r, remaining2) :- right(remaining); + Success((l, r), remaining2) + } + + opaque function ConcatR( + left: Parser, + right: Parser + ) : (p: Parser) + // Return only the result of the right parser if the two parsers match + { + ConcatMap(left, right, (l, r) => r) + } + + opaque function ConcatL( + left: Parser, + right: Parser + ) : (p: Parser) + // Return only the result of the right parser if the two parsers match + { + ConcatMap(left, right, (l, r) => l) + } + + opaque function ZeroOrMore( + underlying: Parser + ): Parser> + // Repeats the underlying parser until the first failure + // that accepts alternatives, and returns the underlying sequence + { + Rep(underlying, (result: seq, r: R) => result + [r], []) + } + + opaque function OneOrMore(underlying: Parser): (p: Parser>) + // Repeats the underlying parser until the first failure + // Will return a failure if there is not at least one match + { + Bind(underlying, (r: R) => + Rep(underlying, (s: seq, r': R) => s + [r'], [r])) + } + + opaque function Rep( + underlying: Parser, + combine: (A, B) -> A, + acc: A + ): Parser + // Repeats the underlying parser until the first failure + // that accepts alternatives, combining results to an accumulator + // and return the final accumulator + { + (input: seq) => Rep_(underlying, combine, acc, input) + } + + opaque function RepSep( + underlying: Parser, + separator: Parser + ): Parser> + // Repeats the underlying parser interleaved with a separator + // Returns a sequence of results + { + Bind(Maybe(underlying), (result: Option) => + if result.None? then Succeed>([]) else + Rep(ConcatR(separator, underlying), (acc: seq, a: A) => acc + [a], [result.value])) + } + + opaque function RepMerge( + underlying: Parser, + merger: (A, A) -> A + ): Parser + // Repeats the underlying parser interleaved with a separator + // Returns a sequence of results + { + Bind(Maybe(underlying), (result: Option) => + if result.None? then Fail("No first element in RepMerge", Recoverable) else + Rep(underlying, (acc: A, a: A) => merger(acc, a), result.value)) + } + + opaque function RepSepMerge( + underlying: Parser, + separator: Parser, + merger: (A, A) -> A + ): Parser + // Repeats the underlying parser interleaved with a separator + // Returns a sequence of results + { + Bind(Maybe(underlying), (result: Option) => + if result.None? then Fail("No first element in RepSepMerge", Recoverable) else + Rep(ConcatR(separator, underlying), (acc: A, a: A) => merger(acc, a), result.value)) + } + + opaque function {:tailrecursion true} Rep_( + underlying: Parser, + combine: (A, B) -> A, + acc: A, + input: seq + ): (p: ParseResult) + decreases |input| + // ZeroOrMore the underlying parser over the input until a recoverable failure happens + // and returns the accumulated results + { + match underlying(input) + case Success(result, remaining) => + if |remaining| >= |input| then Success(acc, input) else + Rep_(underlying, combine, combine(acc, result), remaining) + case failure => + if failure.NeedsAlternative(input) then + Success(acc, input) + else + failure.PropagateFailure() + } + + opaque function Recursive( + underlying: Parser -> Parser + ): (p: Parser) + // Given a function that requires a parser to return a parser, + // provide the result of this parser to that function itself. + // Careful: This function is not tail-recursive and will consume stack. + // Prefer using Rep() or ZeroOrMore() for sequences + { + (input: seq) => Recursive_(underlying, input) + } + + opaque function Recursive_( + underlying: Parser -> Parser, + input: seq + ): (p: ParseResult) + // Implementation for Recursive() + decreases |input| + { + var callback: Parser := + (remaining: seq) => + if |remaining| < |input| then + Recursive_(underlying, remaining) + else if |remaining| == |input| then + Failure(Recoverable, FailureData("no progress", remaining, Option.None)) + else + Failure(Fatal, FailureData("fixpoint called with an increasing remaining sequence", remaining, Option.None)); + underlying(callback)(input) + } + + opaque function RecursiveMap( + underlying: map>, + fun: string): (p: Parser) + // Given a map of name := recursive definitions, + // provide the result of this parser to the recursive definitions + // and set 'fun' as the initial parser. + // Careful: This function is not tail-recursive and will consume stack + { + (input: seq) => RecursiveMap_(underlying, fun, input) + } + + datatype RecursiveDef = RecursiveDef( + order: nat, + definition: ParserSelector -> Parser + ) // The order must be decreasing every time the function steps in + // But it can jump to a bigger order if the input is consumed + + opaque function RecursiveMap_( + underlying: map>, + fun: string, + input: seq + ): (p: ParseResult) + // Implementation for RecursiveMap() + decreases |input|, if fun in underlying then underlying[fun].order else 0 + { + if fun !in underlying then Failure(Fatal, FailureData("parser '"+fun+"' not found", input, Option.None)) else + var RecursiveDef(orderFun, definitionFun) := underlying[fun]; + var callback: ParserSelector + := + (fun': string) => + (var p : Parser := + if fun' !in underlying.Keys then + Fail(fun' + " not defined", Fatal) + else + var RecursiveDef(orderFun', definitionFun') := underlying[fun']; + (remaining: seq) => + if |remaining| < |input| || (|remaining| == |input| && orderFun' < orderFun) then + RecursiveMap_(underlying, fun', remaining) + else if |remaining| == |input| then + Failure(Recoverable, FailureData("non-progressing recursive call requires that order of '" + +fun'+"' ("+intToString(orderFun')+") is lower than the order of '"+fun+"' ("+intToString(orderFun)+")", remaining, Option.None)) + else + Failure(Fatal, FailureData("parser did not return a suffix of the input", remaining, Option.None)) + ; p); + definitionFun(callback)(input) + } + + opaque function CharTest(test: C -> bool, name: string): (p: Parser) + // A parser that returns the current char if it passes the test + // Returns a recoverable error based on the name otherwise + { + (input: seq) => + if 0 < |input| && test(input[0]) then Success(input[0], input[1..]) + else Failure(Recoverable, + FailureData("expected a "+name, input, Option.None)) + } + + function Debug_(message: string): string { + message + } by method { + print message, "\n"; + return message; + } + + opaque function Debug(msg: string, other: Parser): (p: Parser) + // A parser that, when invoked, will print a message before applying its underlying parser + // and also afterwards + { + (input: seq) => + var _ := Debug_(msg + "(before)"); + var p := other(input); + var _ := Debug_(msg + "(after)"); + p + } + + opaque function intToString(n: int): string + // Converts an integer to a string + decreases if n < 0 then 1 - n else n + { + if n < 0 then "-" + intToString(-n) else + match n + case 0 => "0" case 1 => "1" case 2 => "2" case 3 => "3" case 4 => "4" + case 5 => "5" case 6 => "6" case 7 => "7" case 8 => "8" case 9 => "9" + case _ => intToString(n / 10) + intToString(n % 10) + } + + opaque function digitToInt(c: char): int { + match c + case '0' => 0 case '1' => 1 case '2' => 2 case '3' => 3 case '4' => 4 + case '5' => 5 case '6' => 6 case '7' => 7 case '8' => 8 case '9' => 9 + case _ => -1 + } + + opaque function stringToInt(s: string): int + // Converts a string to a string + decreases |s| + { + if |s| == 0 then 0 else + if |s| == 1 then digitToInt(s[0]) + else if s[0] == '-' then + 0 - stringToInt(s[1..]) + else + stringToInt(s[0..|s|-1])*10 + stringToInt(s[|s|-1..|s|]) + } +} \ No newline at end of file diff --git a/src/Parsers/ParsersBuilders.dfy b/src/Parsers/ParsersBuilders.dfy new file mode 100644 index 00000000..7fefd67b --- /dev/null +++ b/src/Parsers/ParsersBuilders.dfy @@ -0,0 +1,176 @@ +include "Parsers.dfy" + +// Nice wanna-to-be DSL to build parsers to avoid too much parenthesis nesting +// B(p) returns a parser builder from a normal parser. +// B1.o_I(B2) will parse both but return the result of B2 +// B1.I_o(B2) will parse both but return the result of B1 +// B.M(f) will map the result of the parser builder by f if succeeded +// B1.O(B2) will either parse B1, or B2 if B1 fails with Recoverable +// FirstOf([B1, B2, B3]) +// will parse with B1, but if B1 fails with Recoverable, +// it will parse with B2, but if B2 fails with Recoverable, +// it will parse with B3 +// R(v) returns a parser builder that returns immediately v +// +// There are more parser builders in the trait Engine, when their spec depends on +// a predetermined input, e.g. to tests for constant strings + +abstract module ParserBuilders { + import P: Parsers + export + provides P + provides O + provides Ok + provides Fail + provides Rec + provides B.e_I + provides B.I_e + provides B.I_I + provides B.M + provides B.If + provides B.? + provides B.?? + provides B.Bind + provides B.Rep + provides B.RepSep + provides B.RepMerge + provides B.RepSepMerge + provides B.ZeroOrMore + provides B.OneOrMore + provides End + reveals CharTest + reveals B + reveals Rec, RecMap + reveals RecMapDef, FailureLevel, RecMapSel + + type FailureLevel = P.FailureLevel + type RecMapSel = string -> B + + // Wrap the constructor in a class where the size is constant so that users + // don'result need to provide it. + datatype B = B(apply: P.Parser) + { + function ?(): B> { + B(P.Maybe(apply)) + } + function ??(): B { + B(P.?(apply)) + } + function e_I(other: B): (p: B) + // Excludes the left, includes the right + { + B(P.ConcatR(apply, other.apply)) + } + function I_e(other: B): (p: B) + // Includes the left, excludes the right + { + B(P.ConcatL(apply, other.apply)) + } + function I_I(other: B): (p: B<(R, U)>) + // Includes the left, excludes the right + { + B(P.Concat(apply, other.apply)) + } + function If(thn: B): (p: B) { + B(P.If(apply, thn.apply)) + } + function M(mappingFunc: R -> U): (p: B) + // Maps the result + { + B(P.Map(apply, mappingFunc)) + } + function Bind(other: R -> B): (p: B) + { + B(P.Bind(apply, (result: R) => other(result).apply)) + } + + function Rep(init: A, combine: (A, R) -> A): (p: B) + { + B(P.Rep(apply, combine, init)) + } + + function RepSep(separator: B): (p: B>) + { + B(P.RepSep(apply, separator.apply)) + } + + + function RepMerge(merger: (R, R) -> R): (p: B) + { + B(P.RepMerge(apply, merger)) + } + + function RepSepMerge(separator: B, merger: (R, R) -> R): (p: B) + { + B(P.RepSepMerge(apply, separator.apply, merger)) + } + + function ZeroOrMore(): (p: B>) + { + B(P.ZeroOrMore(apply)) + } + + function OneOrMore(): (p: B>) + { + B(P.OneOrMore(apply)) + } + } + + function Ok(result: R): (p: B) + { + B(P.Succeed(result)) + } + + function Fail(message: string, level: FailureLevel := FailureLevel.Recoverable): (p: B) + { + B(P.Fail(message, level)) + } + + function O(alternatives: seq>): B + // Declares a set of alternatives as a single list + { + if |alternatives| == 0 then Fail("no alternative") else + if |alternatives| == 1 then alternatives[0] + else + B(P.Or(alternatives[0].apply, O(alternatives[1..]).apply)) + } + + function End(): B<()> + { + B(P.EndOfString()) + } + + function CharTest(test: P.C -> bool, name: string): B + { + B(P.CharTest(test, name)) + } + + opaque function Rec( + underlying: B -> B + ): B + { + B(P.Recursive((p: P.Parser) => + underlying(B(p)).apply)) + } + + datatype RecMapDef = RecMapDef( + order: nat, + definition: RecMapSel -> B) + + opaque function RecMap( + underlying: map>, + fun: string): (p: B) + { + B(P.RecursiveMap( + map k <- underlying :: k := + P.RecursiveDef( + underlying[k].order, + (selector: P.ParserSelector) => + underlying[k].definition( + (name: string) => + B(selector(name)) + ).apply), + fun + )) + } +} diff --git a/src/Parsers/ParsersDisplayers.dfy b/src/Parsers/ParsersDisplayers.dfy new file mode 100644 index 00000000..90b71a3c --- /dev/null +++ b/src/Parsers/ParsersDisplayers.dfy @@ -0,0 +1,46 @@ +include "Parsers.dfy" + +// From these parsers, we can create displayers +// and prove the roundtrip displayer / parser if we wanted to +abstract module ParsersDiplayers { + import Parsers`All + + type Parser = Parsers.Parser + type C = Parsers.C + + type Displayer<-R> = (R, seq) -> seq + + function Concat( + left: Displayer, + right: Displayer + ): Displayer<(A, B)> { + (ab: (A, B), remaining: seq) => + var remaining2 := right(ab.1, remaining); + var remaining3 := left(ab.0, remaining2); + remaining3 + } + + ghost predicate Roundtrip(parse: Parser, display: Displayer) + // The parser and the displayer are dual to each other + // means that if we parse after printing, we get the same result + { + forall a: A, remaining: seq :: + parse(display(a, remaining)) == Parsers.Success(a, remaining) + } + + lemma {:rlimit 1000} ConcatRoundtrip( + pA: Parser, ppA: Displayer, + pB: Parser, ppB: Displayer + ) + requires Roundtrip(pA, ppA) && Roundtrip(pB, ppB) + ensures Roundtrip(Parsers.Concat(pA, pB), Concat(ppA, ppB)) + { + reveal Parsers.Concat(); + var p := Parsers.Concat(pA, pB); + var d := Concat(ppA, ppB); + forall ab: (A, B), remaining: seq ensures + p(d(ab, remaining)) == Parsers.Success(ab, remaining) + { + } + } +} \ No newline at end of file diff --git a/src/Parsers/ParsersTests.dfy b/src/Parsers/ParsersTests.dfy new file mode 100644 index 00000000..2735d589 --- /dev/null +++ b/src/Parsers/ParsersTests.dfy @@ -0,0 +1,410 @@ +include "Parsers.dfy" + +abstract module ParserTests refines Parsers { + lemma AboutSucceed(result: R, input: seq) + ensures + var p := Succeed(result); + && p(input).Success? + && p(input).remaining == input + { reveal Succeed(); } + + lemma AboutFail_(message: string, level: FailureLevel, input: seq) + ensures + var p := Fail(message, level)(input); + && p.Failure? + && p.data == FailureData(message, input, Option.None) + && p.level == level + { + reveal Fail(); + } + + lemma AboutFail_2(message: string, input: seq) + ensures + var p := Fail(message)(input); + && p.Failure? + && p.level == Recoverable + && p.data == FailureData(message, input, Option.None) + { + reveal Fail(); + } + + lemma AboutBind_( + left: Parser, + right: (L, seq) -> Parser, + input: seq + ) + ensures + var p := BindSucceeds(left, right)(input); + && var leftResult := left(input); + && !leftResult.IsFailure() + ==> var leftValues := left(input).Extract(); + && var rightResult := right(leftValues.0, leftValues.1)(leftValues.1); + && !rightResult.IsFailure() + ==> && !p.IsFailure() + && p.remaining == rightResult.remaining + && p.result == rightResult.result + { + reveal BindSucceeds(); + } + + lemma AboutMap_(underlying: Parser, mappingFunc: R -> U, input: seq) + ensures var p := Map(underlying, mappingFunc); + && (underlying(input).Success? <==> p(input).Success?) + && (p(input).Success? ==> + && p(input).remaining == underlying(input).remaining + && p(input).result == mappingFunc(underlying(input).result)) + { + reveal Map(); + reveal BindSucceeds(); + reveal Succeed(); + } + + function BindMapCallback(mappingFunc: R -> U): + (R, seq) -> Parser + { + (result: R, remaining: seq) => Succeed(mappingFunc(result)) + } + + lemma AboutMap_Bind_(underlying: Parser, mappingFunc: R -> U, input: seq) + ensures Map(underlying, mappingFunc)(input) + == BindSucceeds(underlying, BindMapCallback(mappingFunc))(input) + { + reveal Map(); + reveal BindSucceeds(); + reveal Succeed(); + } + + lemma AboutConcat( + left: Parser, + right: Parser, + input: seq) + ensures var p := Concat(left, right); + && (p(input).Success? ==> + && left(input).Success? + && p(input).result.0 == left(input).result + && var input2 := left(input).remaining; + && right(input2).Success? + && p(input).result.1 == right(input2).result + && p(input).remaining == right(input2).remaining) + { + reveal Concat(); + reveal ConcatMap(); + } + + function BindConcatCallback(right: Parser): (L, seq) -> Parser<(L, R)> + { + (l: L, remaining: seq) => + Map(right, (r: R) => (l, r)) + } + + lemma AboutConcatBindSucceeds( + left: Parser, + right: Parser, + input: seq) + ensures Concat(left, right)(input) == BindSucceeds(left, BindConcatCallback(right))(input) + { + reveal Concat(); + reveal BindSucceeds(); + reveal Succeed(); + reveal Map(); + reveal ConcatMap(); + } + + lemma AboutConcatR( + left: Parser, + right: Parser, + input: seq) + ensures var p := ConcatR(left, right); + && (p(input).Success? ==> + && left(input).Success? + && var input2 := left(input).remaining; + && right(input2).Success? + && p(input).result == right(input2).result + && p(input).remaining == right(input2).remaining) + { + reveal ConcatR(); + reveal ConcatMap(); + } + + function first(): ((L, R)) -> L { + (lr: (L, R)) => lr.0 + } + function second(): ((L, R)) -> R { + (lr: (L, R)) => lr.1 + } + lemma AboutConcatConcatR( + left: Parser, + right: Parser, + input: seq) + ensures Map(Concat(left, right), second())(input) == ConcatR(left, right)(input) + { + reveal Concat(); + reveal Succeed(); + reveal ConcatR(); + reveal Map(); + reveal ConcatMap(); + } + + + lemma AboutConcatL( + left: Parser, + right: Parser, + input: seq) + ensures var p := ConcatL(left, right); + && (p(input).Success? ==> + && left(input).Success? + && var input2 := left(input).remaining; + && right(input2).Success? + && p(input).result == left(input).result + && p(input).remaining == right(input2).remaining) + { + reveal ConcatL(); + reveal ConcatMap(); + } + lemma AboutConcatConcatL( + left: Parser, + right: Parser, + input: seq) + ensures Map(Concat(left, right), first())(input) == ConcatL(left, right)(input) + { + reveal Concat(); + reveal Succeed(); + reveal ConcatL(); + reveal Map(); + reveal ConcatMap(); + } + + predicate AboutRepIncreasesPosIfUnderlyingSucceedsAtLeastOnceEnsures( + underlying: Parser, + acc: seq, + input: seq + ) + { + var result := ZeroOrMore(underlying)(input); + && result.Success? + && |acc| <= |result.result| + && (underlying(input).Success? && |underlying(input).remaining| < |input| + ==> + (|acc| < |result.result| && |result.remaining| < |input|)) + } + + predicate AboutFix_Ensures( + underlying: Parser -> Parser, + input: seq) + { + var p := Recursive_(underlying, input); + p.Success? ==> IsRemaining(input, p.remaining) + } + + lemma {:vcs_split_on_every_assert} AboutFix( + underlying: Parser -> Parser, + input: seq) + requires + forall callback: Parser, u: seq + | underlying(callback)(u).Success? + :: IsRemaining(input, underlying(callback)(input).Remaining()) + ensures AboutFix_Ensures(underlying, input) + { + reveal Recursive_(); + } + + + predicate AboutRecursiveMap_Ensures( + underlying: map>, + fun: string, + input: seq + ) { + var p := RecursiveMap_(underlying, fun, input); + && (p.Success? ==> IsRemaining(input, p.remaining)) + } + + + lemma SucceedValid(result: R) + ensures Valid(Succeed(result)) + { reveal Valid(), Succeed(); } + + lemma SucceedValidAuto() + ensures forall result: R :: Valid(Succeed(result)) + { reveal Valid(), Succeed(); } + + lemma EpsilonValid() + ensures Valid(Epsilon()) + { reveal Valid(), Epsilon(); SucceedValid(()); } + + lemma AboutEpsilon(input: seq) + ensures + var p := Epsilon(); + && p(input).Success? + && p(input).remaining == input + { + reveal Epsilon(); + reveal Succeed(); + } + + lemma FailValid(message: string) + ensures Valid(Fail(message, Recoverable)) + { reveal Fail(); reveal Valid(); } + + lemma FailValidAuto() + ensures forall message :: Valid(Fail(message, Recoverable)) + { reveal Fail(); reveal Valid(); } + + ghost predicate BindRightValid(right: (L, seq) -> Parser) { + forall l: L, input: seq :: Valid(right(l, input)) + } + + lemma BindSucceedsValid( + left: Parser, + right: (L, seq) -> Parser + ) + requires Valid(left) + requires BindRightValid(right) + ensures Valid(BindSucceeds(left, right)) + { + reveal BindSucceeds(), Valid(); + var p := BindSucceeds(left, right); + forall input: seq ensures + && (p(input).Failure? ==> p(input).level == Recoverable) + && IsRemaining(input, p(input).Remaining()) + { + + } + } + + ghost predicate BindValidRight(left: Parser) + requires Valid(left) + { + forall right: (L, seq) -> Parser | BindRightValid(right) :: + Valid(BindSucceeds(left, right)) + } + + lemma BindValidAuto() + ensures forall left: Parser | Valid(left) :: + BindValidRight(left) + { + forall left: Parser | Valid(left), + right: (L, seq) -> Parser | BindRightValid(right) + ensures + Valid(BindSucceeds(left, right)) + { + BindSucceedsValid(left, right); + } + } + + lemma intToStringThenStringToIntIdem(n: int) + decreases if n < 0 then 1 - n else n + ensures 0 <= n ==> 1 <= |intToString(n)| && intToString(n)[0] != '-' + ensures stringToInt(intToString(n)) == n + { + reveal intToString(), stringToInt(), digitToInt(); + if n < 0 { + calc { + stringToInt(intToString(n)); + stringToInt("-" + intToString(-n)); + 0 - stringToInt(intToString(-n)); + { intToStringThenStringToIntIdem(-n); } + n; + } + } else if 0 <= n <= 9 { + assert stringToInt(intToString(n)) == n; + } else { + assert intToString(n) == intToString(n / 10) + intToString(n % 10); + var s := intToString(n); + } + } + opaque predicate IsStringInt(s: string): (b: bool) + ensures b ==> |s| > 0 + { + |s| > 0 && + if s[0] == '-' then + |s| > 1 && s[1] != '0' && + (forall i | 1 <= i < |s| :: s[i] in "0123456789") + else + (|s| > 1 ==> s[0] != '0') && + (forall i | 0 <= i < |s| :: s[i] in "0123456789") + } + + lemma stringToIntNonnegative(s: string) + requires IsStringInt(s) + requires s[0] != '-' + decreases |s| + ensures 0 <= stringToInt(s) + ensures s != "0" ==> 0 < stringToInt(s) + ensures |s| > 1 ==> 10 <= stringToInt(s) + { + if |s| == 0 { + + } else if |s| == 1 { + reveal digitToInt(), stringToInt(), IsStringInt(); + match s[0] + case '0' => case '1' => case '2' => case '3' => case '4' => + case '5' => case '6' => case '7' => case '8' => case '9' => + case _ => + } else if s[0] == '-' { + } else { + assert !(|s| == 0 || |s| == 1 || s[0] == '-'); + reveal stringToInt(); + assert stringToInt(s) == stringToInt(s[0..|s|-1])*10 + stringToInt(s[|s|-1..|s|]); + assert IsStringInt(s[0..|s|-1]) by { + reveal IsStringInt(); + } + stringToIntNonnegative(s[..|s|-1]); + var tail := s[|s|-1..|s|]; + assert IsStringInt(tail) && tail[0] != '-' by { + reveal IsStringInt(); + } + stringToIntNonnegative(tail); + reveal IsStringInt(); + assert |s| > 1 ==> 10 <= stringToInt(s); + } + } + + lemma stringToIntThenIntToStringIdem(s: string) + requires IsStringInt(s) + decreases |s| + ensures s[0] != '-' ==> 0 <= stringToInt(s) + ensures |s| == 1 ==> 0 <= stringToInt(s) <= 9 + ensures intToString(stringToInt(s)) == s + { + assert |s| > 0; + if 1 <= |s| && s[0] == '-' { + reveal intToString(), stringToInt(), IsStringInt(); + assert forall i | 1 <= i < |s| :: s[i] in "0123456789"; + calc { + intToString(stringToInt(s)); + intToString(0 - stringToInt(s[1..])); + } + } else if |s| == 1 { + reveal intToString(), stringToInt(), IsStringInt(), digitToInt(); + calc { + intToString(stringToInt(s)); + s; + } + } else { + var n := stringToInt(s); + stringToIntNonnegative(s); + var init := s[..|s|-1]; + var last := s[|s|-1..|s|]; + var q := stringToInt(init); + var r := stringToInt(last); + assert IsStringInt(init) by { reveal IsStringInt(); } + assert IsStringInt(last) by { reveal IsStringInt(); } + stringToIntThenIntToStringIdem(init); + stringToIntThenIntToStringIdem(last); + assert stringToInt(s) == + stringToInt(s[0..|s|-1])*10 + stringToInt(s[|s|-1..|s|]) by { + reveal stringToInt(); + } + assert n == q * 10 + r; + calc { + intToString(n); + { reveal intToString(); + assert !(n < 0); + assert n != 0; + } + intToString(n / 10) + intToString(n % 10); + s; + } + } + } +} diff --git a/src/Parsers/README.md b/src/Parsers/README.md new file mode 100644 index 00000000..9e0b6a96 --- /dev/null +++ b/src/Parsers/README.md @@ -0,0 +1,109 @@ +# Verified Parser Combinators + +Parser combinators in Dafny, inspired from the model (Meijer&Hutton 1996). + +This library offers two styles of functional parser combinators. + +- The first parsers style is a synonym for `seq -> ParseResult` that supports monadic styles, is straightforward to use, but results in lots of closing parentheses. + +- The second parsers style is a datatype wrapper around the first style, which enable to define functions as infix or suffixes, which makes parsers sometimes easier to read and helps decreasing nesting. + +## Library usage + +The tutorial in [`Tutorial.dfy`](examples/Tutorial.dfy) shows how to import the library call the two parsers style API, apply the parser to a string, and also use the PrintFailure to pretty print the failure along with the line/col where it occurred. + +To view a full example of how to use the parser combinator library, +especially how to define a recursive parser that is guaranteed to terminate, +please refer to the files [`polynomialParser.dfy`](examples/polynomialParser.dfy) and [`polynomialParserBuilders.dfy`](examples/polynomialParserBuilder.dfy), which both parse polynomial expressions. + +As a quick walkthrough, here is a test to parse a Tic-tac-toe grid: + +``` +method {:test} TestTicTacToe() { + var x := OrSeq([ + String("O"), String("X"), String(" ") + ]); + var v := String("|"); + var row := Concat(x, ConcatR(v, Concat(x, ConcatR(v, x)))); + var sep := String("\n-+-+-\n"); + var grid := + Concat(row, ConcatR(sep, Concat(row, ConcatR(sep, row)))); + var input := "O|X| \n-+-+-\nX|O| \n-+-+-\nP| |O"; + // 012345 678901 234567 890123 45678 + var r := grid(input); + expect r.IsFailure(); + PrintFailure(input, r); +} +``` + +it displays the following: + +``` +Error: +5: P| |O + ^ +expected 'O', or +expected 'X', or +expected ' ' +``` +## What is verified? + +Despite combinators enabling to define mutually recursive parsers (`RecursiveMap`, `Recursive`), Dafny will always check termination. When using recursive combinators, termination is checked at run-time so it does not prevent quick prototyping and iterations, and error messages about non-termination are always precise (either the ordering, or the progression). + +This library offers a predicate on parsers of the first style `Valid()`, which +indicates that such parsers will never raise a fatal result, and will always return a +string that is suffix of the string they are given as input. Many combinators have +a proof that, if their inputs are Valid(), then their result is Valid(). +Checking validity statically could help design parsers that do even less checks at run-time, but it has not been developed in this library. + +This library also offers a dual type to parser, named Displayer, which is `(Result, seq) -> seq`. It only defines the dual of the Concat parser combinator and proves the roundtrip to be the identity. Because Dafny does not offer +compilable predicate to check that a datatype constructor is included in another one, +writing combinators for this kind of parser dual is difficult. + +## Relationship to JSON parsers + +The JSON parser is very specialized and the type of the parsers combinators it is using is actually a subset type. +Subset types are known to be a source of proof brittleness, +so this library design is not using subset types. +That said, it is possible to create an adapter around a JSON parser to make it a parser of this library. + +# Caveats + +- Recursive parsers will consume stack and, in programming languages that have a finite amount of stack, programs can get out of memory. Prefer `Rep` and `RepSeq` as much as possible as they are tail-recursive. + +# Implementation notes + +The module hierarchy is as follow: + +``` +abstract module Parsers { + type C +} +module StringParsers { + type C: Char +} + +abstract module ParsersBuilders { + import P: Parsers +} +module StringParsersBuilders { + import P = StringParsers +} +``` + + +## FAQ: + +### What properties can we use it to prove? + +* You get for free that parsers terminate, at worst with a run-time "fatal" parser result "no progress and got back to the same function" +* You can actually prove the absence of fatal errors. I have several lemmas that propagate this property through parsers combinators. I'm working on a lemma for the recursive case (already done in a previous formalism, need to migrate) +* You can prove the equivalence between various combinations of parser combinators (e.g. Bind/Succed == Map) +* You can use these parser combinators as a specification for optimized methods that perform the same task but perhaps with a different representation, e.g. array and position instead of sequences of characters. + +### How does it backtrack? Like Parsec (fails if tokens are consumed)? + +There are several way parsers can backtrack in the current design. +* A parser not consuming any input when returning a recoverable error can be ignored for combinators with alternatives like `Or`, `Maybe`, `If` or `ZeroOrMore` (respectively `O([...])`, `.?()`, `.If()` and `.ZeroOrMore()` if using builders) +* It's possible to transform a parser to not consume any input when it fails (except fatal errors) via the combinator `?(...)` (`.??()` if using builders). This means the failure will have the same input as previously given, making it possible for surrounding combinators to explore alternatives. +* The combinators `BindResult`, a generalization of the `Bind` combinator when considering parsers as monads, lets the user decides whether to continue on the left parser's remaining input or start from the beginning. \ No newline at end of file diff --git a/src/Parsers/StringParsers.dfy b/src/Parsers/StringParsers.dfy new file mode 100644 index 00000000..a32b4edb --- /dev/null +++ b/src/Parsers/StringParsers.dfy @@ -0,0 +1,157 @@ +include "Parsers.dfy" + +module StringParsers refines Parsers { + export StringParsers extends Parsers + provides + Char, + Digit, + DigitNumber, + Nat, + Int, + String, + ExtractLineCol, + PrintFailure, + Wrappers, + Space, + WS + reveals C + + type C = char + + // ################################## + // String-specific parser combinators + // ################################## + + opaque function Char(expectedChar: char): (p: Parser) + // A parser that tests if the current char is the given expected char + { + CharTest((c: char) => c == expectedChar, [expectedChar]) + } + + opaque function Space(): (p: Parser) + { + CharTest(c => c in " \t\r\n", "space") + } + + opaque function WS(): (p: Parser) + { + ZeroOrMore(Space()) + } + + opaque function Digit(): (p: Parser) + // A parser that tests if the current char is a digit and returns it + { + CharTest(c => c in "0123456789", "digit") + } + + opaque function DigitNumber(): (p: Parser) + // A parser that returns the current char as a number if it is one + { + Map(Digit(), (c: char) => + var d := digitToInt(c); + var n: nat := if d >= 0 then d else 0; + n + ) + } + + opaque function Nat(): (p: Parser) + // A parser that parses a natural number + { + Bind(DigitNumber(), + (result: nat) => + Rep(DigitNumber(), + (previous: nat, c: nat) => + var r: nat := previous * 10 + c; r, + result + ) + ) + } + + opaque function Int(): (p: Parser) + // A parser that parses a integer, possibly negative + { + Bind(Maybe(Char('-')), + (minusSign: Option) => + Map(Nat(), (result: nat) => if minusSign.Some? then 0-result else result)) + } + + opaque function String(expected: string): (p: Parser) + // A parser that succeeds only if the input starts with the given string + { + (input: string) => + if |expected| <= |input| && input[0..|expected|] == expected then Success(expected, input[|expected|..]) + else Failure(Recoverable, FailureData("expected '"+expected+"'", input, Option.None)) + } + + // ######################## + // Error handling utilities + // ######################## + + function repeat_(str: string, n: nat): (r: string) + // Repeats the given string n times + ensures |r| == |str| * n + { + if n == 0 then "" + else str + repeat_(str, n-1) + } + + method ExtractLineCol(input: string, pos: nat) + returns (lineNumber: nat, lineStr: string, colNumber: nat) + // Returns the line number, the extracted line, and the column number + // corresponding to a given position in the given input + { + lineNumber := 1; + var startLinePos: nat := 0; + colNumber := 0; + var i := 0; + while i < |input| && i != pos + invariant 0 <= startLinePos <= i <= |input| + { + colNumber := colNumber + 1; + if input[i] == '\r' && i + 1 < |input| && input[i+1] == '\n' { + lineNumber := lineNumber + 1; + colNumber := 0; + i := i + 1; + startLinePos := i + 1; + } else if input[i] in "\r\n" { + lineNumber := lineNumber + 1; + colNumber := 0; + startLinePos := i + 1; + } + i := i + 1; + } + while i < |input| && input[i] !in "\r\n" + invariant startLinePos <= i <= |input| + { + i := i + 1; + } + lineStr := input[startLinePos..i]; + } + + method PrintFailure(input: string, result: ParseResult, printPos: int := -1) + // Util to print the line, the column, and all the error messages + // associated to a given parse failure + requires result.Failure? + decreases result.data + { + if printPos == -1 { + print if result.level == Fatal then "Fatal error" else "Error", ":\n"; + } + var pos: int := |input| - |result.data.remaining|; // Need the parser to be Valid() + if pos < 0 { // Could be proved false if parser is Valid() + pos := 0; + } + if printPos != pos { + var line, lineStr, col := ExtractLineCol(input, pos); + print line, ": ", lineStr, "\n"; + print repeat_(" ", col + 2 + |intToString(line)|), "^","\n"; + } + print result.data.message; + if result.data.next.Some? { + print ", or\n"; + PrintFailure(input, Failure(result.level, result.data.next.value), pos); + } else { + print "\n"; + } + } +} \ No newline at end of file diff --git a/src/Parsers/StringParsersBuilders.dfy b/src/Parsers/StringParsersBuilders.dfy new file mode 100644 index 00000000..df02147b --- /dev/null +++ b/src/Parsers/StringParsersBuilders.dfy @@ -0,0 +1,43 @@ +include "StringParsers.dfy" +include "ParsersBuilders.dfy" + +module StringParsersBuilders refines ParserBuilders { + import P = StringParsers + export StringParsersBuilders extends ParserBuilders + provides S, Int, WS, Except, ParseTest, Digit, DigitNumber, ParseTestCallback + + function S(s: string): B { + B(P.String(s)) + } + function Int(): B { + B(P.Int()) + } + function Digit(): B { + B(P.Digit()) + } + function DigitNumber(): B { + B(P.DigitNumber()) + } + function WS(): B { + B(P.WS()) + } + function Except(s: string): B { + B(P.ZeroOrMore(P.CharTest((c: char) => c !in s, s))) + } + method ParseTestCallback(p: B, input: string, printer: T -> string) { + var result := p.apply(input); + if result.Failure? { + P.PrintFailure(input, result); + } else { + print printer(result.result), "\n"; + } + } + method ParseTest(p: B, input: string) { + var result := p.apply(input); + if result.Failure? { + P.PrintFailure(input, result); + } else { + print result.result, "\n"; + } + } +} \ No newline at end of file diff --git a/src/Parsers/StringParsersTests.dfy b/src/Parsers/StringParsersTests.dfy new file mode 100644 index 00000000..6d7ef536 --- /dev/null +++ b/src/Parsers/StringParsersTests.dfy @@ -0,0 +1,32 @@ +include "StringParsers.dfy" + +abstract module StringParserTests refines StringParsers { + datatype ParserResultArray = + ASuccess(result: string, pos: nat) + | AFailure(pos: nat) + + method ParseString(expected: string, input: array, i: nat) + returns (result: ParserResultArray) + requires i <= input.Length + ensures result.ASuccess? <==> String(expected)(input[..][i..]).Success? + ensures if result.ASuccess? then + result.result == String(expected)(input[..][i..]).result + else + result.pos == i + |input[..]|-|String(expected)(input[..][i..]).Remaining()| + { + reveal String(); + if i + |expected| <= input.Length && input[i..i+|expected|] == expected { + result := ASuccess(expected, i+|expected|); + } else { + result := AFailure(i); + assert String(expected)(input[..][i..]).Remaining() == input[..]; + } + } + method OptimizedSplit(input: string) returns (result: seq) + ensures Success(result, "") == + ConcatL(RepSep(ZeroOrMore(CharTest((c: char) => c != ',', "noncomma")), String(",")), + EndOfString())(input) + { +// var input + } +} diff --git a/src/Parsers/examples/AdventOfCode1.dfy b/src/Parsers/examples/AdventOfCode1.dfy new file mode 100644 index 00000000..89aeb172 --- /dev/null +++ b/src/Parsers/examples/AdventOfCode1.dfy @@ -0,0 +1,32 @@ +// RUN: %test "%s" + +include "../stringParsersBuilders.dfy" + +module AdventOfCode1 { + import opened StringParsersBuilders + + const nonDigit := + Except("0123456789\r\n").ZeroOrMore() + + const digit := + B(P.DigitNumber()) + + const parseLine := + nonDigit.e_I(digit).Bind((first: nat) => + nonDigit.e_I(digit).??().Rep((first, first), + (pair: (nat, nat), newDigit: nat) => (pair.0, newDigit) + )).I_e(nonDigit) + + const parseInput := + parseLine.I_e(S("\r").?().e_I(S("\n").?())) + .Rep(0, (acc: int, newElem: (nat, nat)) => + acc + newElem.0 * 10 + newElem.1) + + method {:test} TestParser() { + var input := @"1abc2 +pqr3stu8vwx +a1b2c3d4e5f +treb7uchet"; + ParseTest(parseInput, input); + } +} \ No newline at end of file diff --git a/src/Parsers/examples/DafnyParser.dfy b/src/Parsers/examples/DafnyParser.dfy new file mode 100644 index 00000000..82e63360 --- /dev/null +++ b/src/Parsers/examples/DafnyParser.dfy @@ -0,0 +1,109 @@ +// RUN: %test "%s" + +include "../stringParsersBuilders.dfy" + +// A parser that can self-parse +module DafnyParser { + import opened StringParsersBuilders + + type Option = StringParsersBuilders.P.Option + + datatype Program = + Program(includes: seq, declarations: seq) + + datatype Declaration = + | Module(moduleName: Type, declarations: seq) + | Import(opend: bool, imported: Type) + | Datatype(datatypeName: Type, constructors: seq) + | Const(name: string, tpe: Option, constDef: Expr) + | TypeSynonymDecl(typeName: Type, typeArgs: seq, typeDef: Type) + + datatype Constructor = + Constructor(name: string, formals: seq) + + datatype Formal = + Formal(name: Option, tpe: Type) + + datatype Type = + | TypeName(name: string) + | ApplyType(underlying: Type, args: seq) + | SelectType(prefix: Type, field: Type) + { + function applyPrefix(name: string): Type { + match this { + case ApplyType(underlying, args) => ApplyType(underlying.applyPrefix(name), args) + case SelectType(enclosing, field) => SelectType(enclosing.applyPrefix(name), field) + case _ => SelectType(TypeName(name), this) + } + } + } + + datatype Expr = + | TODO + + const stringLit := + S("\"").e_I(Except("\"")).I_e(S("\"")) + + /*const parserImport := S("import").e_I(WS()).e_I( + S("opened").e_I(WS()).Maybe()).I_I(stringLit).M( + (s: (Option, string)) => Import(s.0.Some?, s.1));*/ + const parseInclude := WS().e_I(S("include")).??().e_I(WS()).e_I(stringLit) + + const parseIdentifier := CharTest((c: char) => c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_?$", "Identifier character") + .OneOrMore() + + const parseType: B := + Rec((rec: B) => + parseIdentifier.Bind((id: string) => + var init := TypeName(id); + O([WS().e_I(S("<")).??().e_I( + rec.Bind((t: Type) => + WS().e_I(S(",")).??().e_I(rec).ZeroOrMore() + ).I_e(WS().I_e(S(">"))).M((types: seq) => + ApplyType(TypeName(id), types) + )), + WS().e_I(S(".")).??().e_I(rec).M( + (tpe: Type) => + tpe.applyPrefix(id) + ), + Ok(init) + ]) + )) + + const parseConstructor: B := Fail("parseConstructor not implemented yet") + + const parseDeclaration: B := + Rec((declParser: B) => + O([ + WS().e_I(S("module")).??().e_I(WS()).e_I(parseType).I_e(WS()).I_e(S("{")). + I_I(declParser.ZeroOrMore()).I_e(WS()).I_e(S("}")).M((r: (Type, seq)) => + Module(r.0, r.1)), + WS().e_I(S("import")).??().e_I(WS()).e_I(S("opened").e_I(WS()).?()).I_I(parseType).M( + (s: (Option, Type)) => Import(s.0.Some?, s.1)), + WS().e_I(S("datatype")).??().e_I(WS()).e_I(parseType).I_e(WS().e_I(S("="))).I_I( + parseConstructor.OneOrMore()).M((r: (Type, seq)) => + Datatype(r.0, r.1) + ) + ])) + + const parseProgram := + parseInclude.ZeroOrMore().I_I(parseDeclaration.ZeroOrMore()).M( + (idecls: (seq, seq)) => + Program(idecls.0, idecls.1) + ) + + method {:test} TestParser() { + var program := @" +include ""file"" + +import opened test + +module Test { + module Inner { + + } +} +"; + ParseTest(parseProgram, program); + } +} \ No newline at end of file diff --git a/src/Parsers/examples/JSONParser.dfy b/src/Parsers/examples/JSONParser.dfy new file mode 100644 index 00000000..c7220239 --- /dev/null +++ b/src/Parsers/examples/JSONParser.dfy @@ -0,0 +1,81 @@ +// RUN: %test "%s" + +include "../stringParsersBuilders.dfy" + +// A parser that can parse a JSON-like structure +// Strings however are parsed without unicode escape. +module JSONParser { + import opened StringParsersBuilders + datatype Decimal = + Decimal(n: int, e10: int) // (n) * 10^(e10) + + function ToDecimal(n: int): Decimal { + Decimal(n, 0) + } + + function ToDecimalFrac(n: int, digits: seq, e10: int := 0): Decimal + decreases |digits| + { + if digits == [] then Decimal(n, e10) + else ToDecimalFrac(n * 10 + digits[0], digits[1..], e10 - 1) + } + + datatype JSON = + | Null + | Bool(b: bool) + | String(str: string) + | Number(num: Decimal) + | Object(obj: seq<(string, JSON)>) // Not a map to preserve order + | Array(arr: seq) + + const nullParser: B := WS().e_I(S("null")).??().M((s) => Null) + const boolParser: B := WS().e_I(O([S("true"), S("false")])).??().M((s: string) => + Bool(s == "true")) + const stringCharParser: B := + O([ + S("\\\"").??().M((s: string) => '"'), + S("\\\\").??().M((s: string) => '\\'), + CharTest((c: char) => c != '\\' && c != '"', "no escape no quote" + ).??() + ]).ZeroOrMore() + + const stringParser: B := WS().e_I(S("\"")).??().e_I(stringCharParser).I_e(S("\"")) + + const stringJSONParser: B := stringParser.M((s: string) => String(s)) + + const numberJSONParser: B := + Int().I_I(S(".").e_I(DigitNumber().ZeroOrMore()).?()).M( + (s: (int, P.Option>)) => + if s.1.None? then Number(ToDecimal(s.0)) + else Number(ToDecimalFrac(s.0, s.1.value, 0))) + + const arrayParser: B -> B := (rec: B) => + WS().e_I(S("[")).??().e_I( + WS().e_I(rec).RepSep(WS().e_I(S(","))).M((s: seq) => Array(s)) + .I_e(WS()).I_e(S("]"))) + + const objectParser: B -> B := (rec: B) => + WS().e_I(S("{")).??().e_I( + WS().e_I(stringParser).I_I(WS().e_I(S(":").e_I(WS()).e_I(rec))) + .RepSep(WS().e_I(S(","))).M((s: seq<(string, JSON)>) => Object(s)) + .I_e(WS()).I_e(S("}"))) + + const parseProgram: B := + Rec((rec: B) => + O([ + nullParser, + boolParser, + stringJSONParser, + numberJSONParser, + arrayParser(rec), + objectParser(rec) + ])).I_e(End()) + + method {:test} TestParser() { + var source := @"{""a"": null, ""b"": [1.42, 25.150]}"; + ParseTest(parseProgram, source); + ParseTest(parseProgram, "[ ]"); + source := @"[true, false, null]"; + ParseTest(parseProgram, source); + } +} \ No newline at end of file diff --git a/src/Parsers/examples/ParserGenerator.dfy b/src/Parsers/examples/ParserGenerator.dfy new file mode 100644 index 00000000..7c1c5411 --- /dev/null +++ b/src/Parsers/examples/ParserGenerator.dfy @@ -0,0 +1,166 @@ +// RUN: %test "%s" + +include "../stringParsersBuilders.dfy" + +// A small regex-like language that can be turned into a straightforward parser +// So first we parse the parser to ParserSpec, we convert it to a parser +// and we parse the string using this parser. +// TODO: Compile this parser and prove it does the same. +module ParserGenerator { + import opened StringParsersBuilders + + type Option = StringParsersBuilders.P.Option + + function ToBool(): T -> bool { + t => true + } + + datatype ParserSpec = + | Const(s: string) + | And(left: ParserSpec, right: ParserSpec) + | Or(left: ParserSpec, right: ParserSpec) + | Repeat(p: ParserSpec) + { + predicate OnlyAndRepeat() { + match this + case Const(s) => true + case And(left, right) => left.OnlyAndRepeat() && right.OnlyAndRepeat() + case Or(left, right) => false + case Repeat(p) => p.OnlyAndRepeat() + } + function ToParser(): B { + match this + case Const(s) => S(s).M(ToBool()) + case And(left, right) => left.ToParser().e_I(right.ToParser()).M(ToBool()) + case Or(left, right) => O([left.ToParser().??(), right.ToParser()]).M(ToBool()) + case Repeat(x) => x.ToParser().??().ZeroOrMore().M(ToBool()) + } + function ToString(): string { + match this + case Const(s) => s + case And(left, right) => left.ToString() + right.ToString() + case Or(left, right) => "(" + left.ToString() + "|" + right.ToString() + ")" + case Repeat(underlying) => + var u := underlying.ToString(); + if |u| == 0 then "" else + if u[0..1] == "(" then u + "*" + else "(" + u + ")*" + } + } + + const parseSpec: B := + RecMap(map[ + "atom" := RecMapDef(0, (c: RecMapSel) => + O([ + S("(").e_I(c("or")).I_e(S(")")).Bind((atom: ParserSpec) => + S("*").?().M((star: Option) => + if star.None? then atom else Repeat(atom)) + ), + Except("()|").M((r: string) => ParserSpec.Const(r)) + ])), + "and" := RecMapDef(1, (c: RecMapSel) => + c("atom").RepMerge((atom1: ParserSpec, atom2: ParserSpec) => And(atom1, atom2))), + "or" := RecMapDef(2, (c: RecMapSel) => + c("and").RepSepMerge(S("|"), (and1: ParserSpec, and2: ParserSpec) => Or(and1, and2))) + ], "or") + + method {:test} TestParser() { + var program := "abc((de|f((g))*))ml"; + ParseTestCallback(parseSpec, program, (result: ParserSpec) => result.ToString()); + var parser := parseSpec.apply(program); + expect parser.Success?; + var underlying := parser.result.ToParser(); + program := "abcdeml"; + print underlying.apply(program); // Should print true + } + + // TODO: Some kind of compilation? +/* + datatype ParserStmt = + Expect(c: char) + | Stmts(first: ParserStmt, next: PArserStmt) + | Repeat(underlying: ParserStmt) + | Break() + { + function ToProgram(indent: string := ""): string { + match this { + case Expect(c) => "if input[i] == '" + [c] + "' { }" + } + } + // (ok, cancelling) + function Run(input: string, index: nat): (bool, nat) { + if |input| <= index then (false, index) else + match this { + case Expect(c) => (input[index] == c, index + 1) + case Stmts(first) => + if s == [] then (true, index) + else + var (r, newIndex) := !s[0].Run(input, index); + if r then Stmts(s[1..]).Run(input, newIndex) + else (false, index) // We completely forget about the failure + case Break() => + case Repeat(stmts) => + stmts + } + } + + method RunImperative(input: string) returns (b: bool) + ensures b == Run(input, 0) + { + var i := + } + } + //datatype +*/ + // A ParserSpec can be compiled to this non-deterministic Automata + // We will prove that the two parsing strategies are equivalent + /*datatype Automata = Automata( + nStates: nat, + startState: nat, + transitions: set<(nat, char, nat)>, + finalState: set + ) + { + static function FromParserSpec(spec: ParserSpec): Automata { + match spec { + case Const("") => + Automata(1, 0, {}, {0}) + case Const(s) => + var a := FromParserSpec(Const(s[1..])); + var newStart := a.nStates; + Automata(a.nStates + 1, newStart, a.transitions + {(newStart, s[0], a.startState)}, a.finalState) + case Or(left, right) => + var l := FromParserSpec(left); + var r := FromParserSpec(right); + var offsetRight := (n: nat) => n + l.nStates; + var newStart := l.nStates + r.nStates + 1; + var rightTransitions := set rt <- r.transitions :: (offsetRight(rt.0), rt.1, offsetRight(rt.2)); + Automata(l.nStates + r.nStates + 1, + newStart, + l.transitions + rightTransitions + + + set firstLeftTransition <- l.transitions | + firstLeftTransition.0 == l.start + ) + case _ => Automata(0, 0, {}, {}) + } + } + + function Run(input: string, states: set, index: nat): set { + if index >= |input| then {} + else set newState: nat, s: nat | + 0 <= newState < nStates && 0 <= s < nStates && + (s, input[index], newState) in transitions + :: newState + } + predicate Accepts(input: string) { + Run(input, {startState}, 0) * finalState != {} + } + + lemma Equivalence(spec: ParserSpec, input: string) + ensures spec.ToParser().apply(input).Success? + <==> FromParserSpec(spec).Accepts(input) + { + + } + }*/ +} \ No newline at end of file diff --git a/src/Parsers/examples/PolynomialParser.dfy b/src/Parsers/examples/PolynomialParser.dfy new file mode 100644 index 00000000..73cf8882 --- /dev/null +++ b/src/Parsers/examples/PolynomialParser.dfy @@ -0,0 +1,119 @@ +include "../StringParsers.dfy" + +module PolynomialParser { + import opened P = StringParsers + + // Parser combinators style + const parser: Parser + := ConcatL(RecursiveMap(map[ + "atom" := RecursiveDef(0, (callback: ParserSelector) => + Or(ConcatR( + String("("), ConcatL( + callback("term"), + String(")"))), + Or( + Map(Int(), (result: int) => Number(result)), ConcatR( + String("x"), + Map(Maybe(ConcatR( + String("^"), Int())), + (result: Option) => + if result.Some? then Unknown(result.value) else Unknown(1) + ))))), + "factor" := RecursiveDef(1, (callback: ParserSelector) => + Bind(callback("atom"), (atom: Expr) => + Rep( + Concat(Or(String("*"), Or(String("/"), String("%"))), + callback("atom")), + Expr.InfixBuilder(), atom) + )), + + "term" := RecursiveDef(2, (callback: ParserSelector) => + Bind(callback("factor"), (factor: Expr) => + Rep( + Concat(Or(String("+"), String("-")), + callback("factor")), + Expr.InfixBuilder(), factor) + )) + ], "term"), EndOfString()) + + type Result = Wrappers.Result + + datatype Expr = + | Binary(op: string, left: Expr, right: Expr) + | Number(value: int) + | Unknown(power: int) + { + + function Simplify(): Result { + match this { + case Number(x: int) => Result.Success(this) + case Binary(op, left, right) => + var l :- left.Simplify(); + var r :- right.Simplify(); + if l.Number? && r.Number? then + match op { + case "+" => Result.Success(Number(l.value + r.value)) + case "-" => Result.Success(Number(l.value - r.value)) + case "*" => Result.Success(Number(l.value * r.value)) + case "/" => + if r.value == 0 then + Result.Failure("Division by zero (" + right.ToString() + + " evaluates to zero)") + else + Result.Success(Number(l.value / r.value)) + case "%" => + if r.value == 0 then + Result.Failure("Modulo by zero (" + right.ToString() + + " evaluates to zero)") + else + Result.Success(Number(l.value % r.value)) + case _ => Result.Failure("Unsupported operator: " + op) + } + else + Result.Success(Binary(op, l, r)) + case Unknown(0) => Result.Success(Number(1)) + case Unknown(_) => Result.Success(this) + } + } + + static function InfixBuilder(): (Expr, (string, Expr)) -> Expr + { + (left: Expr, right: (string, Expr)) => Binary(right.0, left, right.1) + } + + function ToString(): string { + match this + case Number(x) => P.intToString(x) + case Binary(op, left, right) => + "(" + + left.ToString() + op + right.ToString() + + ")" + case Unknown(power) => + if power == 1 then "x" else if power == 0 then "1" else + "x^" + P.intToString(power) + } + } + + method Main(args: seq) { + if |args| <= 1 { + print "Please provide a polynomial to parse as argument\n"; + return; + } + for i := 1 to |args| { + var input := args[i]; + match parser(input) { + case Success(result, remaining) => + print "Polynomial:", result.ToString(), "\n"; + match result.Simplify() { + case Success(x) => + print "Simplified:", x.ToString(), "\n"; + case Failure(message) => + print message; + } + case failure => + PrintFailure(input, failure); + } + print "\n"; + } + } +} \ No newline at end of file diff --git a/src/Parsers/examples/PolynomialParserBuilder.dfy b/src/Parsers/examples/PolynomialParserBuilder.dfy new file mode 100644 index 00000000..3e509e13 --- /dev/null +++ b/src/Parsers/examples/PolynomialParserBuilder.dfy @@ -0,0 +1,112 @@ +include "../StringParsersBuilders.dfy" + + +module PolynomialParsersBuilder { + import opened StringParsersBuilders + + import P = StringParsersBuilders.P + + // PArsers builder style + const parser: B + := + RecMap(map[ + "atom" := RecMapDef(0, (c: RecMapSel) => + O([ + S("(").e_I(c("term")).I_e(S(")")), + Int().M((result: int) => Number(result)), + S("x").e_I(S("^").e_I(Int()).?().M( + (result: P.Option) => + if result.Some? then Unknown(result.value) else Unknown(1))) + ])), + + "factor" := RecMapDef(1, (c: RecMapSel) => + c("atom").Bind((atom: Expr) => // TODO: Finish this one + O([S("*"), S("/"), S("%")]) + .I_I(c("atom")).Rep(atom, Expr.InfixBuilder()))), + + "term" := RecMapDef(2, (c: RecMapSel) => + c("factor").Bind((atom: Expr) => + O([S("+"), S("-")]) + .I_I(c("factor")).Rep(atom, Expr.InfixBuilder()))) + ], "term") + .I_e(End()) + + type Result = StringParsersBuilders.P.Wrappers.Result + + datatype Expr = + | Binary(op: string, left: Expr, right: Expr) + | Number(value: int) + | Unknown(power: int) + { + + function Simplify(): Result { + match this { + case Number(x: int) => Result.Success(this) + case Binary(op, left, right) => + var l :- left.Simplify(); + var r :- right.Simplify(); + if l.Number? && r.Number? then + match op { + case "+" => Result.Success(Number(l.value + r.value)) + case "-" => Result.Success(Number(l.value - r.value)) + case "*" => Result.Success(Number(l.value * r.value)) + case "/" => + if r.value == 0 then + Result.Failure("Division by zero (" + right.ToString() + + " evaluates to zero)") + else + Result.Success(Number(l.value / r.value)) + case "%" => + if r.value == 0 then + Result.Failure("Modulo by zero (" + right.ToString() + + " evaluates to zero)") + else + Result.Success(Number(l.value % r.value)) + case _ => Result.Failure("Unsupported operator: " + op) + } + else + Result.Success(Binary(op, l, r)) + case Unknown(0) => Result.Success(Number(1)) + case Unknown(_) => Result.Success(this) + } + } + static function InfixBuilder(): (Expr, (string, Expr)) -> Expr + { + (left: Expr, right: (string, Expr)) => Binary(right.0, left, right.1) + } + function ToString(): string { + match this + case Number(x) => P.intToString(x) + case Binary(op, left, right) => + "(" + + left.ToString() + op + right.ToString() + + ")" + case Unknown(power) => + if power == 1 then "x" else if power == 0 then "1" else + "x^" + P.intToString(power) + } + } + + method Main(args: seq) { + if |args| <= 1 { + print "Please provide a polynomial to parse as argument\n"; + return; + } + for i := 1 to |args| { + var input := args[i]; + match parser.apply(input) { + case Success(result, remaining) => + print "Polynomial:", result.ToString(), "\n"; + match result.Simplify() { + case Success(x) => + print "Simplified:", x.ToString(), "\n"; + case Failure(message) => + print message; + } + case failure => + P.PrintFailure(input, failure); + } + print "\n"; + } + } +} \ No newline at end of file diff --git a/src/Parsers/examples/Tutorial.dfy b/src/Parsers/examples/Tutorial.dfy new file mode 100644 index 00000000..8249354c --- /dev/null +++ b/src/Parsers/examples/Tutorial.dfy @@ -0,0 +1,83 @@ +// RUN: %test "%s" + +include "../stringParsersBuilders.dfy" + +module Tutorial.Parsers { + import opened StringParsers + + method {:test} TestSplit1() { + var nonComma: Parser := + OneOrMore(CharTest((c: char) => c != ',', "non-comma")); + var p := + Bind(nonComma, (result: string) => + Rep(ConcatR(String(","), nonComma), + (acc, elem) => acc + [elem], + [result] + )); + + expect p("abc,d,efg") == ParseResult.Success(["abc","d","efg"], ""); + expect p("abc,d,,") == + ParseResult.Failure(Recoverable, FailureData("expected a non-comma", ",", Option.None)); + PrintFailure("abc,d,,", p("abc,d,,")); + // Displays + // Error: + // 1: abc,d,, + // ^ + // expected a non-comma + } + + function flatten(): ((A, (A, A))) -> (A, A, A) { + (input: (A, (A, A))) => + (input.0, input.1.0, input.1.1) + } + + method {:test} TestTicTacToe() { + var x := OrSeq([ + String("O"), String("X"), String(" ") + ]); + var v := String("|"); + var row := Map(Concat(x, ConcatR(v, Concat(x, ConcatR(v, x)))), + flatten()); + var sep := String("\n-+-+-\n"); + var grid := Map( + Concat(row, ConcatR(sep, Concat(row, ConcatR(sep, row)))), + flatten<(string, string, string)>()) + ; + var input := "O|X| \n-+-+-\nX|O| \n-+-+-\nP| |O"; + // 012345 678901 234567 890123 45678 + var r := grid(input); + expect r.IsFailure(); + expect |input| - |r.data.remaining| == 24; + expect r.data.message == "expected 'O'"; + expect r.data.next.Some?; + expect r.data.next.value.message == "expected 'X'"; + expect r.data.next.value.next.Some?; + expect r.data.next.value.next.value.message == "expected ' '"; + expect r.data.next.value.next.value.next.None?; + PrintFailure(input, r); + // Displays: + // Error: + // 5: P| |O + // ^ + // expected 'O', or + // expected 'X', or + // expected ' ' + } +} + + +module Tutorial.ParsersBuilders { + import opened StringParsersBuilders + + method {:test} TestSplit1() { + var nonComma: B := + CharTest((c: char) => c != ',', "non-comma").OneOrMore(); + var p := + nonComma.Bind((result: string) => + S(",").e_I(nonComma).Rep([result], + (acc: seq, elem: string) => acc + [elem] + )); + + expect p.apply("abc,d,efg") == P.ParseResult.Success(["abc","d","efg"], ""); + } +} \ No newline at end of file diff --git a/src/Parsers/examples/Tutorial.dfy.expect b/src/Parsers/examples/Tutorial.dfy.expect new file mode 100644 index 00000000..e69de29b