diff --git a/astro.config.ts b/astro.config.ts
index 4d803f425..a5faa1826 100644
--- a/astro.config.ts
+++ b/astro.config.ts
@@ -396,6 +396,7 @@ export default defineConfig({
"adventure/minimessage/api",
"adventure/minimessage/dynamic-replacements",
"adventure/minimessage/translator",
+ "adventure/minimessage/specification",
],
},
"adventure/serializer/ansi",
diff --git a/ec.config.mjs b/ec.config.mjs
index cb21c5af1..ee20df8f2 100644
--- a/ec.config.mjs
+++ b/ec.config.mjs
@@ -1,5 +1,6 @@
import { pluginCollapsibleSections } from "@expressive-code/plugin-collapsible-sections";
import { pluginLineNumbers } from "@expressive-code/plugin-line-numbers";
+import backusNaurHighlight from "./src/utils/shiki/bnf.tmLanguage.json" with { type: "json" };
import miniMessageHighlight from "./src/utils/shiki/mm.tmLanguage.json" with { type: "json" };
/** @type {import('@astrojs/starlight/expressive-code').StarlightExpressiveCodeOptions} */
@@ -14,6 +15,6 @@ export default {
},
emitExternalStylesheet: false,
shiki: {
- langs: [miniMessageHighlight],
+ langs: [miniMessageHighlight, backusNaurHighlight],
},
};
diff --git a/src/content/docs/adventure/minimessage/specification.md b/src/content/docs/adventure/minimessage/specification.md
new file mode 100644
index 000000000..4394acca1
--- /dev/null
+++ b/src/content/docs/adventure/minimessage/specification.md
@@ -0,0 +1,289 @@
+---
+title: Language Specification
+slug: adventure/minimessage/specification
+description: A developer-facing specification of the MiniMessage format.
+tableOfContents:
+ minHeadingLevel: 2
+ maxHeadingLevel: 4
+---
+
+This document outlines the MiniMessage format in detail to aid developers who wish to implement their own MiniMessage
+parser from scratch or understand the internal processes happening during the parsing of MiniMessage formatted strings.
+
+The keywords “MUST”, “MUST NOT”, “REQUIRED”, “SHALL”, “SHALL NOT”, “SHOULD”, “SHOULD NOT”, “RECOMMENDED”,
+“MAY”, and “OPTIONAL” in this document are to be interpreted as described in
+[RFC 2119](https://www.rfc-editor.org/rfc/rfc2119.html).
+
+## The MiniMessage language
+
+The MiniMessage language is a markup format used for representing Minecraft's component-based text
+system in a human-readable and modifiable way. Broadly speaking, the language consists of two types
+of tokens: **plain text** and **tags**.
+
+Plain text is any string. This string is UTF-16 compatible. The following is an example of a valid
+plain text part of a MiniMessage-formatted string:
+
+```mm
+The MiniMessage format was made to be as simple as possible.
+Emojies are allowed 😅. So are japanese characters, like 紙.
+```
+
+MiniMessage tags are primarily used for adding markup information to plain text parts. They can, however,
+also add entirely new content into the serialized component. The way how a tag is resolved makes no
+difference to the MiniMessage lexer. A tag has the following structure:
+
+```mm
+
+```
+
+A tag consists of the following parts:
+
+- `< >`: All tags are surrounded by less than and more than symbols.
+- `tagname`: Every tag starts with the name. The name follows the list of characters mentioned as allowed
+ in the [misc/identifiers](#identifiers) section.
+- Tags can have arguments. There are two distinctions between argument types: named and sequenced. Named arguments
+ are, as the name implies, named in some way. Sequenced arguments do not have a name, instead they are a simple list
+ of string values. [Tag argument documentation can be found later in the page](#tag-arguments).
+
+### Tag syntax
+
+MiniMessage tags can surround text.
+
+```mm
+Inner text and outer text.
+```
+
+Tags can be closed by repeating the tag, with a slash in front of the name. Tags are closed implicitly
+when the end of the string is reached. Furthermore, tags can be nested:
+
+```mm
+Some text even more text, and that's really it!
+```
+
+Nested tags are closed implicitly when the outer tag is closed.
+
+```mm
+This text is unmarked marked, inner, and again no longer marked.
+```
+
+If a tag has arguments, these must not be repeated on the closing tag.
+
+```mm
+Some text
+```
+
+Lastly, normal tags can be closed instantly by prepending a `/` to the more-than symbol of an opening tag.
+
+```mm
+This tag is auto-closed:
+```
+
+## Tag arguments
+
+Arguments are placed between the tag name and the closing more-than symbol.
+
+```mm
+
+```
+
+### Named argument types
+
+Before each named argument, a piece of [whitespace](#whitespace) must be present.
+
+There exists two types of named arguments: value-based arguments and flag arguments.
+
+#### Flag argument type
+
+Flags may be preceded by a single exclamation mark `!` and must follow the rules set by [identifiers](#identifiers).
+
+```mm
+
+```
+
+The following shows a tag with invalid flags:
+
+```mm
+
+```
+
+#### Valued argument type
+
+Named arguments with a value consist of an identifier, an equal symbol `=`, and a value.
+
+The identifier follows the rules as explained in the [misc/identifiers](#identifiers) section of this page.
+The value may consist of any UTF-16 characters, but must not contain any whitespace, unless explicitly quoted.
+Please refer to [misc/quoting](#quoting) for any specifics.
+
+Here is an example for valid valued named arguments:
+
+```mm
+
+```
+
+And example for invalid valued named arguments:
+
+```mm
+
+```
+
+:::note
+
+The above tag, assuming the identifiers were valid, would actually parse both `and` and `blue` as flags.
+
+:::
+
+#### Combining flags and values
+
+These two named types can be combined in any way.
+
+```mm
+
+```
+
+### Sequential arguments
+
+Sequential arguments are declared at the end of the tag. Each sequential argument starts with a colon `:`.
+Unless named arguments are present, a whitespace before the first colon `:` is not necessary.
+
+Sequential arguments may contain any UTF-16 characters. Any instances of `<`, `>`, or `:` characters
+must either be escaped (see [misc/escaping](#escaping)) or the argument must be wrapped in quotes
+(see [misc/quoting](#quoting)).
+
+The following are valid MiniMessage tags with sequential arguments:
+
+```mm
+
+
+
+
+
+
+ and a \: colon!>
+
+ and a : colon, but it's quoted!">
+```
+
+### Combining argument types
+
+Named and sequential arguments can be used together. The general syntax looks as follows:
+
+```mm
+
+```
+
+All named arguments must be located between the tag name and the first non-value colon.
+
+A few examples for valid tags making use of both named and sequenced arguments:
+
+```mm
+
+
+
+
+
+```
+
+## Misc
+
+This section defines miscellaneous behavior of common parts.
+
+### Identifiers
+
+All identifiers must be lowercased and contain only alphanumerical characters, `_`, or '-'. All identifiers
+used as named argument names should be unique.
+
+### Quoting
+Argument values can be quoted. A value counts as quoted if the first character is a `'` or `"`. The quoted
+value ends as soon as another unquoted quote of the same character as the starting quote is found at the
+end of an argument.
+
+Between the opening and the closing quote, any UTF-16 characters may be present. This also includes the same
+quote as used for the string. The following would be a valid tag;
+
+```mm
+
+```
+
+This is because the `"` in the middle is **not the last character of the value**. Therefore, it is read
+literally, since the tag would otherwise be invalid.
+
+:::tip
+
+As long as the quote is not closed, the lexer must continue reading characters. If the end of the
+input is reached before closing quote is found, the tag and any following characters should be
+read as plain text, as the tag is never closed. This is to aid users in finding the error in their syntax.
+
+:::
+
+### Whitespace
+
+A whitespace character may be a classical space `\s`, a tab character `\t`,
+a newline `\n`, or a carriage return `\r`.
+
+### Escaping
+
+In MiniMessage, certain symbols, which would be interpreted differently by a lexer may be preceded by a backslash `\`
+to instead be included literally. This includes backslash `\` characters, if they would have any effect on the next
+symbol. If a backlash character had no effect, it is included literally.
+
+## Formal grammar
+
+This segment declares the formal grammar (in a flavor of the Backus-Naur form) which specifies the MiniMessage language.
+
+The specific flavor used here changes that non-terminal symbols are no longer enclosed in angle brackets `<>`
+and the `::=` meta symbol is replaced by `→`. Curly brackets `{}` declare optional parts. Lastly, a `+` suffix
+declares that a symbol should appear at least once, but may appear more often, whilst a `*` suffix declares that
+a symbol may appear once or more often.
+
+```bnf
+; Important notes regarding this specific grammar: due to the massive number of characters included
+; in the UTF-16 characterset, some special non-terminal symbols have been added:
+;
+; utf-16-char → includes all UTF-16 characters.
+;
+; utf-16-char-no-whitespace → includes all UTF-16 characters except for spaces (\s), tabs (\t), newlines (\n)
+; and carriage returns (\r).
+;
+; utf-16-char-no-angle-or-colon → includes all UTF-16 characters except for the
+; angle-bracket characters (<>) and colon (:). However
+; those characters are valid if an uneven number of backslash
+; characters is located infront of them.
+
+minimessage → string {tag string}
+
+string → utf-16-char*
+
+tag → "<" tag-name tag-arguments "/>"
+tag → "<" tag-name tag-arguments ">" minimessage {"" tag-name ">"}
+
+tag-name → identifier
+
+tag-arguments → "" | named-argument " "+ sequential-argument | named-argument | " "* sequential-argument
+
+named-argument → "" | " "+ {"!"} identifier {named-argument} | " "+ identifier "=" named-value {named-argument}
+
+named-value → "" | quoted | no-whitespace-string
+
+no-whitespace-string → utf-16-char-no-whitespace*
+
+sequential-argument → ":" sequential-value {sequential-argument}
+
+sequential-value → "" | quoted | sequential-string
+
+sequential-string → utf-16-char-no-angle-or-colon*
+
+quoted → "'" string "'" | """ string """
+
+identifier → alphanumeric+
+
+alphanumeric → "a" | "b" | "c" | "d"
+ | "e" | "f" | "g" | "h"
+ | "i" | "j" | "k" | "l"
+ | "m" | "n" | "o" | "p"
+ | "q" | "r" | "s" | "t"
+ | "u" | "v" | "w" | "x"
+ | "y" | "z" | "_" | "0"
+ | "1" | "2" | "3" | "4"
+ | "5" | "6" | "7" | "8"
+ | "9" | "-"
+```
diff --git a/src/utils/shiki/bnf.tmLanguage.json b/src/utils/shiki/bnf.tmLanguage.json
new file mode 100644
index 000000000..065e3d9a3
--- /dev/null
+++ b/src/utils/shiki/bnf.tmLanguage.json
@@ -0,0 +1,82 @@
+{
+ "$schema": "https://raw.githubusercontent.com/martinring/tmlanguage/master/tmlanguage.json",
+ "name": "bnf",
+ "scopeName": "source.bnf",
+ "patterns": [{ "include": "#comment" }, { "include": "#rule" }, { "include": "#meta" }, { "include": "#strings" }],
+ "repository": {
+ "comment": {
+ "name": "comment.line.semicolon.bnf",
+ "match": ";.*$"
+ },
+
+ "rule": {
+ "name": "meta.rule.bnf",
+ "begin": "^(\\s*)([A-Za-z0-9_-]+)(\\s*→)",
+ "beginCaptures": {
+ "2": { "name": "entity.name.function.nonterminal.bnf" },
+ "3": { "name": "keyword.reserved.arrow.bnf" }
+ },
+ "end": "(?=^\\s*[A-Za-z0-9_-]+\\s*→|\\Z)",
+ "patterns": [
+ { "include": "#tripleQuotedNonterminal" },
+ { "include": "#strings" },
+ { "include": "#meta" },
+ {
+ "match": "\\b[A-Za-z0-9_-]+\\b",
+ "name": "variable.language.nonterminal.bnf"
+ }
+ ]
+ },
+
+ "tripleQuotedNonterminal": {
+ "name": "meta.triplequoted.nonterminal.bnf",
+ "begin": "\"{3}",
+ "beginCaptures": {
+ "0": { "name": "string.quoted.double.bnf" }
+ },
+ "end": "\"{3}",
+ "endCaptures": {
+ "0": { "name": "string.quoted.double.bnf" }
+ },
+ "patterns": [
+ {
+ "match": "\\b[A-Za-z0-9_-]+\\b",
+ "name": "variable.language.nonterminal.bnf"
+ }
+ ]
+ },
+
+ "strings": {
+ "patterns": [
+ {
+ "name": "string.quoted.double.bnf",
+ "begin": "\"",
+ "end": "\"",
+ "patterns": [{ "match": "\"\"", "name": "constant.character.escape.doublequote.bnf" }]
+ },
+ {
+ "name": "string.quoted.single.bnf",
+ "begin": "'",
+ "end": "'"
+ }
+ ]
+ },
+
+ "meta": {
+ "patterns": [
+ {
+ "match": "→",
+ "name": "keyword.reserved.arrow.bnf"
+ },
+ {
+ "match": "\\|",
+ "name": "keyword.reserved.choice.bnf"
+ },
+ {
+ "match": "[{}()]",
+ "name": "punctuation.section.group.bnf"
+ }
+ ]
+ }
+ }
+}
diff --git a/src/utils/shiki/mm.tmLanguage.json b/src/utils/shiki/mm.tmLanguage.json
index 5788d0d86..e7683527e 100644
--- a/src/utils/shiki/mm.tmLanguage.json
+++ b/src/utils/shiki/mm.tmLanguage.json
@@ -4,39 +4,79 @@
"patterns": [
{
"name": "meta.tag.mm",
- "begin": "(<)(/?|!?)([a-zA-Z0-9_#]+)",
+ "begin": "(?)([a-z_0-9-]+)",
"beginCaptures": {
- "1": { "name": "constant.language.tag.mm" },
- "2": { "name": "constant.language.tag.mm" },
- "3": { "name": "constant.language.tag.mm" }
+ "1": { "name": "punctuation.definition.tag.mm" },
+ "2": { "name": "constant.language.identifier.identifier.mm" }
},
- "end": "(>)",
+ "end": "/?>",
"endCaptures": {
- "1": { "name": "constant.language.tag.mm" }
+ "1": { "name": "punctuation.definition.tag.mm" }
},
"patterns": [
{
- "name": "string.quoted.single.argument.mm",
- "match": "(:)'([^']*)'",
+ "name": "variable.parameter.sequenced.quoted.mm",
+ "match": "(\\s*)(:)(\".*\")",
"captures": {
- "1": { "name": "constant.language.tag.mm" },
- "2": { "name": "string.quoted.single.argument.mm" }
+ "2": { "name": "punctuation.definition.identifier.mm" },
+ "3": { "name": "string.quoted.double.mm" }
}
},
{
- "name": "string.quoted.double.argument.mm",
- "match": "(:)\"([^\"]*)\"",
+ "name": "variable.parameter.sequenced.single-quoted.mm",
+ "match": "(\\s*)(:)('.*')",
"captures": {
- "1": { "name": "constant.language.tag.mm" },
- "2": { "name": "string.quoted.double.argument.mm" }
+ "2": { "name": "punctuation.definition.identifier.mm" },
+ "3": { "name": "string.quoted.single.mm" }
}
},
{
- "name": "variable.language.argument.mm",
- "match": "(:)([^:\\s\"'>]+(?:\\[[^\\]]*\\])?)",
+ "name": "variable.parameter.sequenced.unquoted.mm",
+ "match": "(\\s*)(:)(((\\\\>)|(\\\\:)|[^>:])*)",
"captures": {
- "1": { "name": "constant.language.tag.mm" },
- "2": { "name": "variable.language.argument.mm" }
+ "2": { "name": "punctuation.definition.identifier.mm" },
+ "3": { "name": "string.unquoted.mm" }
+ }
+ },
+ {
+ "name": "variable.parameter.named-argument.quoted.mm",
+ "match": "(\\s+)([a-z_0-9-]+)(=)(\".*\")",
+ "captures": {
+ "2": { "name": "keyword.reserved.identifier.mm" },
+ "3": { "name": "punctuation.definition.identifier.mm" },
+ "4": { "name": "string.quoted.double.mm" }
+ }
+ },
+ {
+ "name": "variable.parameter.named-argument.single-quoted.mm",
+ "match": "(\\s+)([a-z_0-9-]+)(=)('.*')",
+ "captures": {
+ "2": { "name": "keyword.reserved.identifier.mm" },
+ "3": { "name": "punctuation.definition.identifier.mm" },
+ "4": { "name": "string.quoted.single.mm" }
+ }
+ },
+ {
+ "name": "variable.parameter.named-argument.unquoted.mm",
+ "match": "(\\s+)([a-z_0-9-]+)(=)([^ >:]*)",
+ "captures": {
+ "2": { "name": "keyword.reserved.identifier.mm" },
+ "3": { "name": "punctuation.definition.identifier.mm" },
+ "4": { "name": "string.unquoted.mm" }
+ }
+ },
+ {
+ "name": "variable.parameter.inverse-flag.mm",
+ "match": "(\\s+)(![a-z_0-9-]+)",
+ "captures": {
+ "2": { "name": "keyword.reserved.inverse-flag.mm" }
+ }
+ },
+ {
+ "name": "variable.parameter.flag.mm",
+ "match": "(\\s+)([a-z_0-9-]+)",
+ "captures": {
+ "2": { "name": "keyword.reserved.flag.mm" }
}
}
]