diff --git a/pkgs/yaml/lib/src/loader.dart b/pkgs/yaml/lib/src/loader.dart index 7cdf45a7e..99c67ba3d 100644 --- a/pkgs/yaml/lib/src/loader.dart +++ b/pkgs/yaml/lib/src/loader.dart @@ -12,6 +12,7 @@ import 'equality.dart'; import 'error_listener.dart'; import 'event.dart'; import 'parser.dart'; +import 'utils.dart'; import 'yaml_document.dart'; import 'yaml_exception.dart'; import 'yaml_node.dart'; @@ -125,9 +126,7 @@ class Loader { /// Composes a sequence node. YamlNode _loadSequence(SequenceStartEvent firstEvent) { - if (firstEvent.tag != '!' && - firstEvent.tag != null && - firstEvent.tag != 'tag:yaml.org,2002:seq') { + if (!isResolvedYamlTag(firstEvent.tag, 'seq')) { throw YamlException('Invalid tag for sequence.', firstEvent.span); } @@ -147,9 +146,7 @@ class Loader { /// Composes a mapping node. YamlNode _loadMapping(MappingStartEvent firstEvent) { - if (firstEvent.tag != '!' && - firstEvent.tag != null && - firstEvent.tag != 'tag:yaml.org,2002:map') { + if (!isResolvedYamlTag(firstEvent.tag, 'map')) { throw YamlException('Invalid tag for mapping.', firstEvent.span); } @@ -192,10 +189,22 @@ class Loader { var result = _parseNumber(scalar, allowInt: false); if (result != null) return result; throw YamlException('Invalid float scalar.', scalar.span); - case 'tag:yaml.org,2002:str': - return YamlScalar.internal(scalar.value, scalar); - default: - throw YamlException('Undefined tag: ${scalar.tag}.', scalar.span); + + /// Represent partially as a string when custom tags are present. Any + /// other yaml tag must be `!!str`. + /// + /// See: https://yaml.org/spec/1.2/spec.html#id2768011 + /// (PS: This is the YAML version this parser is based on) + case String? tag: + { + // Intentionally (quirky and) verbose. We want this condition to leak + // for non-schema tags. + if (!isResolvedYamlTag(tag, 'str')) { + throw YamlException('Undefined tag: ${scalar.tag}.', scalar.span); + } + + return YamlScalar.internal(scalar.value, scalar); + } } } diff --git a/pkgs/yaml/lib/src/parser.dart b/pkgs/yaml/lib/src/parser.dart index e924e40ea..cb6c51f61 100644 --- a/pkgs/yaml/lib/src/parser.dart +++ b/pkgs/yaml/lib/src/parser.dart @@ -290,16 +290,23 @@ class Parser { } String? tag; - if (tagToken != null) { - if (tagToken!.handle == null) { - tag = tagToken!.suffix; + if (tagToken + case TagToken(:final handle, :final suffix, :final isVerbatim)) { + /// Verbatim tags cannot be resolved as global tags. + /// + /// See: https://yaml.org/spec/1.2.2/#691-node-tags + /// - Verbatim tags section + /// - All 1.2.* versions behave this way + if (handle == null || isVerbatim) { + tag = suffix; } else { - var tagDirective = _tagDirectives[tagToken!.handle]; + final tagDirective = _tagDirectives[handle]; + if (tagDirective == null) { throw YamlException('Undefined tag handle.', tagToken!.span); } - tag = tagDirective.prefix + (tagToken?.suffix ?? ''); + tag = tagDirective.prefix + suffix; } } diff --git a/pkgs/yaml/lib/src/scanner.dart b/pkgs/yaml/lib/src/scanner.dart index 1cfd3af61..2a821b6e7 100644 --- a/pkgs/yaml/lib/src/scanner.dart +++ b/pkgs/yaml/lib/src/scanner.dart @@ -918,22 +918,55 @@ class Scanner { /// /// %TAG !yaml! tag:yaml.org,2002: \n /// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + /// + /// OR + /// + /// %TAG !yaml! !dart \n + /// ^^^^^^^^^^^^^^^^^ + /// Token _scanTagDirectiveValue(LineScannerState start) { _skipBlanks(); - var handle = _scanTagHandle(directive: true); - if (!_isBlank) { + final (:tagHandle, :isNamed) = _scanTagHandle(directive: true); + + // !yaml! or ! or !!. Throw for !yaml + if (!isNamed && tagHandle != '!' && tagHandle != '!!') { + throw YamlException( + 'Invalid global tag handle', + _scanner.spanFrom(start), + ); + } else if (!_isBlank) { throw YamlException('Expected whitespace.', _scanner.emptySpan); } _skipBlanks(); - var prefix = _scanTagUri(); + var prefix = ''; + + /// Both tag uri and local tags can be used as prefixes. + /// + /// See: https://yaml.org/spec/1.2.2/#6822-tag-prefixes + if (_scanner.peekChar() == EXCLAMATION) { + prefix = _scanTagHandle( + directive: true, + isGlobalTagPrefix: true, + ).tagHandle; + } else { + prefix = _scanTagUri(); + + if (prefix.isEmpty) { + throw YamlException( + 'Expected a non-empty global tag prefix', + _scanner.emptySpan, + ); + } + } + if (!_isBlankOrEnd) { throw YamlException('Expected whitespace.', _scanner.emptySpan); } - return TagDirectiveToken(_scanner.spanFrom(start), handle, prefix); + return TagDirectiveToken(_scanner.spanFrom(start), tagHandle, prefix); } /// Scans a [TokenType.anchor] token. @@ -975,72 +1008,164 @@ class Scanner { /// Scans a [TokenType.tag] token. Token _scanTag() { - String? handle; - String suffix; - var start = _scanner.state; + final start = _scanner.state; // Check if the tag is in the canonical form. - if (_scanner.peekChar(1) == LEFT_ANGLE) { - // Eat '!<'. - _scanner.readChar(); - _scanner.readChar(); + if (_scanner.peekChar(1) == LEFT_ANGLE) return _scanVerbatimTag(start); - handle = ''; - suffix = _scanTagUri(); + // The tag has either the '!suffix' or the '!handle!suffix' form. - _scanner.expect('>'); - } else { - // The tag has either the '!suffix' or the '!handle!suffix' form. + // First, try to scan a handle. + final (:tagHandle, :isNamed) = _scanTagHandle(); - // First, try to scan a handle. - handle = _scanTagHandle(); + String? handle = tagHandle; + var suffix = ''; - if (handle.length > 1 && handle.startsWith('!') && handle.endsWith('!')) { - suffix = _scanTagUri(flowSeparators: false); - } else { - suffix = _scanTagUri(head: handle, flowSeparators: false); + if (isNamed || tagHandle == '!!') { + suffix = _scanTagUri(flowSeparators: false); - // There was no explicit handle. - if (suffix.isEmpty) { - // This is the special '!' tag. - handle = null; - suffix = '!'; - } else { - handle = '!'; - } + /// Secondary and named tag handles cannot have an empty tag suffix. + /// + /// c-ns-shorthand-tag ::= + /// c-tag-handle + /// ns-tag-char+ + /// + /// See: https://yaml.org/spec/1.2.2/#691-node-tags + if (suffix.isEmpty) { + throw YamlException( + 'Expected a non-empty shorthand suffix', + _scanner.spanFrom(start), + ); + } + } else { + suffix = _scanTagUri(head: handle, flowSeparators: false); + + // There was no explicit handle. + if (suffix.isEmpty) { + // This is the special '!' tag. + handle = null; + suffix = '!'; + } else { + handle = '!'; // Not named. } } // libyaml insists on whitespace after a tag, but example 7.2 indicates // that it's not required: http://yaml.org/spec/1.2/spec.html#id2786720. - - return TagToken(_scanner.spanFrom(start), handle, suffix); + return TagToken( + _scanner.spanFrom(start), + handle, + suffix, + isVerbatim: false, + ); } - /// Scans a tag handle. - String _scanTagHandle({bool directive = false}) { - _scanner.expect('!'); + /// Scans a canonical [TokenType.tag] token whose [start] position is + /// provided by [_scanTag]. + TagToken _scanVerbatimTag(LineScannerState start) { + // Eat '!<'. + final buffer = StringBuffer() + ..writeCharCode(_scanner.readChar()) + ..writeCharCode(_scanner.readChar()); - var buffer = StringBuffer('!'); + var tagUri = ''; - // libyaml only allows word characters in tags, but the spec disagrees: - // http://yaml.org/spec/1.2/spec.html#ns-tag-char. - var start = _scanner.position; - while (_isTagChar) { - _scanner.readChar(); + if (_scanner.peekChar() == EXCLAMATION) { + tagUri = _scanTagHandle(isVerbatimTag: true).tagHandle; // ! + + if (tagUri == '!') { + throw YamlException( + 'A non-specific tag cannot be declared as a verbatim tag', + _scanner.spanFrom(start), + ); + } + } else { + tagUri = _scanTagUri(); // ! + + /// Expect ! to be ! (a global tag) + /// + /// See: https://yaml.org/spec/1.2.2/#3212-tags + if (!tagUri.startsWith('tag:') || tagUri.substring(4).isEmpty) { + throw YamlException( + 'Invalid tag uri used as a verbatim tag', + _scanner.spanFrom(start), + ); + } } - buffer.write(_scanner.substring(start)); + + _scanner.expect('>'); + buffer.write('$tagUri>'); + + return TagToken( + _scanner.spanFrom(start), + '', + buffer.toString(), + isVerbatim: true, + ); + } + + /// Scans a tag handle and explicitly indicates if the handle was a named + /// tag handle. + /// + /// If [isGlobalTagPrefix] is `true`, the handle can never be a secondary or + /// named tag handle. Such handles cannot be used in a global tag's local tag + /// prefix. + /// + /// If [isVerbatimTag] is `true`, `isNamed` will always be `false`. Verbatim + /// tags expect the next non-uri char to be `>`. + /// + /// See: https://yaml.org/spec/1.2/spec.html#id2783273 + ({bool isNamed, String tagHandle}) _scanTagHandle({ + bool directive = false, + bool isGlobalTagPrefix = false, + bool isVerbatimTag = false, + }) { + var named = false; + final start = _scanner.state; + _scanner.expect('!'); + + final buffer = StringBuffer('!'); if (_scanner.peekChar() == EXCLAMATION) { - buffer.writeCharCode(_scanner.readCodePoint()); + buffer.writeCharCode(_scanner.readChar()); + + if (isGlobalTagPrefix) { + throw YamlException( + 'A local tag used as a global tag prefix cannot have a secondary tag' + ' handle', + _scanner.spanFrom(start), + ); + } } else { - // It's either the '!' tag or not really a tag handle. If it's a %TAG - // directive, it's an error. If it's a tag token, it must be part of a - // URI. - if (directive && buffer.toString() != '!') _scanner.expect('!'); + // Both %TAG and tag shorthands can have named handles. + buffer.write(_scanTagUri(flowSeparators: false)); + + /// For directives, expect the "!" for a named tag. No other handle can + /// have a tag uri here. For a tag shorthand anywhere else, this needs to + /// be a separation space (tab included) or line break or nothing. + /// + /// Verbatim tags expect '>'. + if (!isVerbatimTag && buffer.length > 1 && !_isBlankOrEnd) { + _scanner.expect('!'); + + /// A tag directive doesn't allow a local tag with a named handle as a + /// local tag prefix. + /// + /// See: https://yaml.org/spec/1.2/spec.html#id2783273 + if (directive && isGlobalTagPrefix) { + throw YamlException( + 'A local tag used as a global tag prefix cannot have a named tag' + ' handle', + _scanner.spanFrom(start), + ); + } + + buffer.write('!'); + named = true; + } } - return buffer.toString(); + return (isNamed: named, tagHandle: buffer.toString()); } /// Scans a tag URI. @@ -1049,13 +1174,12 @@ class Scanner { /// [flowSeparators] indicates whether the tag URI can contain flow /// separators. String _scanTagUri({String? head, bool flowSeparators = true}) { - var length = head == null ? 0 : head.length; - var buffer = StringBuffer(); + final buffer = StringBuffer(); // Copy the head if needed. // // Note that we don't copy the leading '!' character. - if (length > 1) buffer.write(head!.substring(1)); + if ((head?.length ?? 0) > 1) buffer.write(head!.substring(1)); // The set of characters that may appear in URI is as follows: // @@ -1075,7 +1199,7 @@ class Scanner { } // libyaml manually decodes the URL, but we don't have to do that. - return Uri.decodeFull(_scanner.substring(start)); + return buffer.toString() + Uri.decodeFull(_scanner.substring(start)); } /// Scans a block scalar. diff --git a/pkgs/yaml/lib/src/token.dart b/pkgs/yaml/lib/src/token.dart index 7d5d6bc9a..97aa9c50d 100644 --- a/pkgs/yaml/lib/src/token.dart +++ b/pkgs/yaml/lib/src/token.dart @@ -102,7 +102,10 @@ class TagToken implements Token { /// The tag suffix. final String suffix; - TagToken(this.span, this.handle, this.suffix); + /// Whether this tag is declared in its canonical form + final bool isVerbatim; + + TagToken(this.span, this.handle, this.suffix, {required this.isVerbatim}); @override String toString() => 'TAG $handle $suffix'; diff --git a/pkgs/yaml/lib/src/utils.dart b/pkgs/yaml/lib/src/utils.dart index 0dc132ff8..99a8434e8 100644 --- a/pkgs/yaml/lib/src/utils.dart +++ b/pkgs/yaml/lib/src/utils.dart @@ -38,3 +38,15 @@ bool isHighSurrogate(int codeUnit) => codeUnit >>> 10 == 0x36; /// Whether [codeUnit] is a UTF-16 low surrogate. bool isLowSurrogate(int codeUnit) => codeUnit >>> 10 == 0x37; + +/// Whether a tag is a valid tag based on its [canonicalSuffix] as defined in +/// the yaml spec. Always returns `true` if the caller has a custom tag and can +/// be partially composed/represented (synthetic node). +/// +/// - `seq` - for sequence +/// - `map` - for map +/// - `str` - for scalar +bool isResolvedYamlTag(String? tag, String canonicalSuffix) => + tag == null || + !tag.startsWith('tag:yaml.org,2002:') || // Leaky prefix condition. + tag.endsWith(canonicalSuffix); diff --git a/pkgs/yaml/test/utils.dart b/pkgs/yaml/test/utils.dart index 372440ae9..767d93bec 100644 --- a/pkgs/yaml/test/utils.dart +++ b/pkgs/yaml/test/utils.dart @@ -7,6 +7,8 @@ import 'package:test/test.dart'; import 'package:yaml/src/equality.dart' as equality; +import 'package:yaml/src/scanner.dart'; +import 'package:yaml/src/token.dart'; import 'package:yaml/yaml.dart'; /// A matcher that validates that a closure or Future throws a [YamlException]. @@ -93,3 +95,34 @@ String indentLiteral(String text) { return lines.join('\n'); } + +/// Generates tokens that can be consumed by a yaml parser. +Iterable generateTokens(String source) sync* { + final scanner = Scanner(source); + + do { + if (scanner.peek() case Token token) { + yield token; + scanner.advance(); + continue; + } + + break; + } while (true); +} + +/// Matches a [TagDirectiveToken] emitted by a [Scanner] +Matcher isATagDirective(String handle, String prefix) => + isA() + .having((t) => t.handle, 'handle', equals(handle)) + .having((t) => t.prefix, 'prefix', equals(prefix)); + +extension PadUtil on String { + /// Applies an indent of 8 spaces to a multiline string to ensure strings + /// are compatible with existing matchers. + /// + /// See [cleanUpLiteral]. + String asIndented() => split('\n') + .map((line) => line.isEmpty ? line : '${' ' * 8}$line') + .join('\n'); +} diff --git a/pkgs/yaml/test/yaml_test.dart b/pkgs/yaml/test/yaml_test.dart index 3b5b77d2f..40d28fcc3 100644 --- a/pkgs/yaml/test/yaml_test.dart +++ b/pkgs/yaml/test/yaml_test.dart @@ -9,6 +9,7 @@ import 'package:test/test.dart'; import 'package:yaml/src/error_listener.dart'; +import 'package:yaml/src/token.dart'; import 'package:yaml/yaml.dart'; import 'utils.dart'; @@ -992,21 +993,33 @@ void main() { }); group('6.8: Directives', () { - // TODO(nweiz): assert that this produces a warning test('[Example 6.13]', () { - expectYamlLoads('foo', ''' + expect( + () => expectYamlLoads( + 'foo', + ''' %FOO bar baz # Should be ignored # with a warning. - --- "foo"'''); + --- "foo"''', + ), + prints(contains('Warning: unknown directive')), + ); }); - // TODO(nweiz): assert that this produces a warning. test('[Example 6.14]', () { - expectYamlLoads('foo', ''' + expect( + () => expectYamlLoads( + 'foo', + ''' %YAML 1.3 # Attempt parsing # with a warning --- - "foo"'''); + "foo"''', + ), + prints( + contains('Warning: this parser only supports YAML 1.1 and 1.2.'), + ), + ); }); test('[Example 6.15]', () { @@ -1030,8 +1043,95 @@ void main() { bar'''); }); - // Examples 6.18 through 6.22 test custom tag URIs, which this - // implementation currently doesn't plan to support. + // Examples 6.18 through 6.22 test custom tag URIs. Inspect the lower level + // event generator to check correctness of the tag directives we parse + // (and ignore?) + test('[Example 6.18]', () { + const source = ''' +# Global +%TAG ! tag:example.com,2000:app/ +--- +!foo "bar" +'''; + + expect( + generateTokens(source), + anyElement(isATagDirective('!', 'tag:example.com,2000:app/')), + ); + + expectYamlLoads('bar', source.asIndented()); + }); + + test('[Example 6.19]', () { + const source = ''' +%TAG !! tag:example.com,2000:app/ +--- +!!int 1 - 3 # Interval, not integer +'''; + + expect( + generateTokens(source), + anyElement(isATagDirective('!!', 'tag:example.com,2000:app/')), + ); + + expectYamlLoads('1 - 3', source.asIndented()); + }); + + test('[Example 6.20]', () { + const source = ''' +%TAG !e! tag:example.com,2000:app/ +--- +!e!foo "bar" +'''; + + expect( + generateTokens(source), + anyElement(isATagDirective('!e!', 'tag:example.com,2000:app/')), + ); + + expectYamlLoads('bar', source.asIndented()); + }); + + test('[Example 6.21]', () { + const source = ''' +%TAG !m! !my- +--- # Bulb here +!m!light fluorescent +... +%TAG !m! !my- +--- # Color here +!m!light green +'''; + + expect( + generateTokens(source).whereType(), + + // Two different documents. Same directive + everyElement(isATagDirective('!m!', '!my-')), + ); + + expectYamlStreamLoads(['fluorescent', 'green'], source.asIndented()); + }); + + test('[Example 6.22]', () { + const source = ''' +%TAG !e! tag:example.com,2000:app/ +--- +- !e!foo "bar" +'''; + + expect( + generateTokens(source), + anyElement(isATagDirective('!e!', 'tag:example.com,2000:app/')), + ); + + expectYamlLoads(['bar'], source.asIndented()); + }); + + test('Throws for invalid global tag handles', () { + expectYamlFails('%TAG !not-allowed !birdbox'); + expectYamlFails('%TAG uri:not-allowed !birdbox'); + }); }); group('6.9: Node Properties', () { @@ -1048,16 +1148,39 @@ void main() { &a2 baz : *a1'''); }); - // Example 6.24 tests custom tag URIs, which this implementation currently - // doesn't plan to support. + test('[Example 6.24]', () { + expectYamlLoads({'foo': 'baz'}, ''' + ! foo : + ! baz'''); + }); test('[Example 6.25]', () { expectYamlFails('- ! foo'); expectYamlFails('- !<\$:?> foo'); + expectYamlFails('- !'); // Incomplete verbatim tag uri + expectYamlFails('- !'); + }); + + test('[Example 6.26]', () { + expectYamlLoads(['foo', 'bar', 'baz'], ''' + %TAG !e! tag:example.com,2000:app/ + --- + - !local foo + - !!str bar + - !e!tag%21 baz'''); }); - // Examples 6.26 and 6.27 test custom tag URIs, which this implementation - // currently doesn't plan to support. + test('[Example 6.27]', () { + expectYamlFails(''' + %TAG !e! tag:example,2000:app/ + --- + - !e! foo'''); + + expectYamlFails(''' + %TAG !e! tag:example,2000:app/ + --- + - !h!bar foo'''); + }); test('[Example 6.28]', () { expectYamlLoads(['12', 12, '12'], ''' @@ -1073,6 +1196,33 @@ void main() { First occurrence: &anchor Value Second occurrence: *anchor'''); }); + + // Custom & verbatim tags should nudge parser to a load node as a generic + // kind. + test('Represents scalars partially as strings', () { + expectYamlLoads(['3', 'false', '3.10'], ''' + %TAG !! !no-type + %TAG !isA! !generic-kind + --- + - !!int 3 + - !isA!bool false + - ! 3.10'''); + }); + + test('Composes collections completely', () { + expectYamlLoads([ + {}, + [], + ['list'] + ], ''' + %TAG !! !no-type + %TAG !isA! !generic-kind + --- + - !!map {} + - !isA!list [] + - ! + - list'''); + }); }); // Chapter 7: Flow Styles