From fba345ebbdeeb7b80a7c5b3a8b57addf15f9cdfe Mon Sep 17 00:00:00 2001 From: Strokkur24 Date: Fri, 7 Nov 2025 20:42:45 +0100 Subject: [PATCH 1/5] feat: add MiniMessage language specification --- astro.config.ts | 1 + ec.config.mjs | 3 +- .../adventure/minimessage/specification.md | 289 ++++++++++++++++++ src/utils/shiki/bnf.tmLanguage.json | 82 +++++ src/utils/shiki/mm.tmLanguage.json | 80 +++-- 5 files changed, 434 insertions(+), 21 deletions(-) create mode 100644 src/content/docs/adventure/minimessage/specification.md create mode 100644 src/utils/shiki/bnf.tmLanguage.json diff --git a/astro.config.ts b/astro.config.ts index 4d803f425..a5faa1826 100644 --- a/astro.config.ts +++ b/astro.config.ts @@ -396,6 +396,7 @@ export default defineConfig({ "adventure/minimessage/api", "adventure/minimessage/dynamic-replacements", "adventure/minimessage/translator", + "adventure/minimessage/specification", ], }, "adventure/serializer/ansi", diff --git a/ec.config.mjs b/ec.config.mjs index cb21c5af1..ee20df8f2 100644 --- a/ec.config.mjs +++ b/ec.config.mjs @@ -1,5 +1,6 @@ import { pluginCollapsibleSections } from "@expressive-code/plugin-collapsible-sections"; import { pluginLineNumbers } from "@expressive-code/plugin-line-numbers"; +import backusNaurHighlight from "./src/utils/shiki/bnf.tmLanguage.json" with { type: "json" }; import miniMessageHighlight from "./src/utils/shiki/mm.tmLanguage.json" with { type: "json" }; /** @type {import('@astrojs/starlight/expressive-code').StarlightExpressiveCodeOptions} */ @@ -14,6 +15,6 @@ export default { }, emitExternalStylesheet: false, shiki: { - langs: [miniMessageHighlight], + langs: [miniMessageHighlight, backusNaurHighlight], }, }; diff --git a/src/content/docs/adventure/minimessage/specification.md b/src/content/docs/adventure/minimessage/specification.md new file mode 100644 index 000000000..c989460a4 --- /dev/null +++ b/src/content/docs/adventure/minimessage/specification.md @@ -0,0 +1,289 @@ +--- +title: Language Specification +slug: adventure/minimessage/specification +description: A developer-facing specification of the MiniMessage format. +tableOfContents: + minHeadingLevel: 2 + maxHeadingLevel: 5 +--- + +This document outlines the MiniMessage format in detail to aid developers who wish to implement their own MiniMessage +parser from scratch or understand the internal processes happening during the parsing of MiniMesssage formatted strings. + +The keywords “MUST”, “MUST NOT”, “REQUIRED”, “SHALL”, “SHALL NOT”, “SHOULD”, “SHOULD NOT”, “RECOMMENDED”, +“MAY”, and “OPTIONAL” in this document are to be interpreted as described in +[RFC 2119](https://www.rfc-editor.org/rfc/rfc2119.html). + +## The MiniMessage language + +The MiniMessage language is a markup format used for representing Minecraft's component-based text +system in a human-readable and modifiable way. Broadly speaking, the language consists of two types +of tokens: **plain text** and **tags**. + +Plain text is any string. This string is UTF-16 compatible. The following is an example of a valid +plain text part of a MiniMessage-formatted string: + +```mm +The MiniMessage format was made to be as simple as possible. +Emojies are allowed 😅. So are japanese characters, like 紙. +``` + +MiniMessage tags are primarily used for adding markup information to plain text parts. They can, however, +also add entirely new content into the serialized component. The way how a tag is resolved makes no +difference to the MiniMessage lexer. A tag has the following structure: + +```mm + +``` + +A tag consists of the following parts: + +- `< >`: All tags are surrounded by less than and more than symbols. +- `tagname`: Every tag starts with the name. The name follows the list of characters mentioned as allowed + in the [misc/identifiers](#identifiers) section. +- Tags can have arguments. There are two distinctions between argument types: named and sequenced. Named arguments + are, as the name implies, named in some way. Sequenced arguments do not have a name, instead they are a simple list + of string values. [Tag argument documentation can be found later in the page](#tag-arguments). + +### Tag syntax + +MiniMessage tags can surround text. + +```mm +Inner text and outer text. +``` + +Tags can be closed by repeating the tag, with a slash in front of the name. Tags are closed implicitly +when the end of the string is reached. Furthermore, tags can be nested: + +```mm +Some text even more text, and that's really it! +``` + +Nested tags are closed implicitly when the outer tag is closed. + +```mm +This text is unmarked marked, inner, and again no longer marked. +``` + +If a tag has arguments, these must not be repeated on the closing tag. + +```mm +Some text +``` + +Lastly, normal tags can be closed instantly by prepending a `/` to the more-than symbol of an opening tag. + +```mm +This tag is auto-closed: +``` + +## Tag arguments + +Arguments are placed between the tag name and the closing more-than symbol. + +```mm + +``` + +### Named argument types + +Before each named argument, a piece of [whitespace](#whitespace) must be present. + +There exists two types of named arguments: value-based arguments and flag arguments. + +#### Flag argument type + +Flags may be preceded by a single exclamation mark `!` and must follow the rules set by [identifiers](#identifiers). + +```mm + +``` + +The following shows a tag with invalid flags: + +```mm + +``` + +#### Valued argument type + +Named arguments with a value consist of an identifier, an equal symbol `=`, and a value. + +The identifier follows the rules as explained in the [misc/identifiers](#identifiers) section of this page. +The value may consist of any UTF-16 characters, but must not contain any whitespace, unless explicitly quoted. +Please refer to [misc/quoting](#quoting) for any specifics. + +Here is an example for valid valued named arguments: + +```mm + +``` + +And example for invalid valued named arguments: + +```mm + +``` + +:::note + +The above tag, assuming the identifiers were valid, would actually parse both `and` and `blue` as flags. + +::: + +#### Combining flags and values + +These two named types can be combined in any way. + +```mm + +``` + +### Sequential arguments + +Sequential arguments are declared at the end of the tag. Each sequential argument starts with a colon `:`. +Unless named arguments are present, a whitespace before the first colon `:` is not necessary. + +Sequential arguments may contain any UTF-16 characters. Any instances of `<`, `>`, or `:` characters +must either be escaped (see [misc/escaping](#escaping)) or the argument must be wrapped in quotes +(see [misc/quoting](#quoting)). + +The following are valid MiniMessage tags with sequential arguments: + +```mm + + + + + + + and a \: colon!> + + and a : colon, but it's quoted!"> +``` + +### Combining argument types + +Named and sequential arguments can be used together. The general syntax looks as follows: + +```mm + +``` + +All named arguments must be located between the tag name and the first non-value colon. + +A few examples for valid tags making use of both named and sequenced arguments: + +```mm + + + + + +``` + +## Misc + +This section defines miscellaneous behavior of common parts. + +### Identifiers + +All identifiers must be lowercased and contain only alphanumerical characters or `_`. All identifiers +used as named argument names should be unique. + +### Quoting +Argument values can be quoted. A value counts as quoted if the first character is a `'` or `"`. The quoted +value ends as soon as another unquoted quote of the same character as the starting quote is found at the +end of an argument. + +Between the opening and the closing quote, any UTF-16 characters may be present. This also includes the same +quote as used for the string. The following would be a valid tag; + +```mm + +``` + +This is because the `"` in the middle is **not the last character of the value**. Therefore, it is read +literally, since the tag would otherwise be invalid. + +:::tip + +As long as the quote is not closed, the lexer must continue reading characters. If the end of the +input is reached before closing quote is found, the tag and any following characters should be +read as plain text, as the tag is never closed. This is to aid users in finding the error in their syntax. + +::: + +### Whitespace + +A whitespace character may be a classical space `\s`, a tab character `\t`, +a newline `\n`, or a carriage return `\r`. + +### Escaping + +In MiniMessage, certain symbols, which would be interpreted differently by a lexer may be preceded by a backslash `\` +to instead be included literally. This includes backslash `\` characters, if they would have any effect on the next +symbol. If a backlash character had no effect, it is included literally. + +## Formal grammar + +This segment declares the formal grammar (in a flavor of the Backus-Naur form) which specifies the MiniMessage language. + +The specific flavor used here changes that non-terminal symbols are no longer enclosed in angle brackets `<>` +and the `::=` meta symbol is replaced by `→`. Curly brackets `{}` declare optional parts. Lastly, a `+` suffix +declares that a symbol should appear at least once, but may appear more often, whilst a `*` suffix declares that +a symbol may appear once or more often. + +```bnf +; Important notes regarding this specific grammar: due to the massive number of characters included +; in the UTF-16 characterset, some special non-terminal symbols have been added: +; +; utf-16-char → includes all UTF-16 characters. +; +; utf-16-char-no-whitespace → includes all UTF-16 characters except for spaces (\s), tabs (\t), newlines (\n) +; and carriage returns (\r). +; +; utf-16-char-no-angle-or-colon → includes all UTF-16 characters except for the +; angle-bracket characters (<>) and colon (:). However +; those characters are valid if an uneven number of backslash +; characters is located infront of them. + +minimessage → string {tag string} + +string → utf-16-char* + +tag → "<" tag-name tag-arguments "/>" +tag → "<" tag-name tag-arguments ">" minimessage {""} + +tag-name → identifier + +tag-arguments → "" | named-argument " "+ sequential-argument | named-argument | " "* sequential-argument + +named-argument → "" | " "+ {"!"} identifier {named-argument} | " "+ identifier "=" named-value {named-argument} + +named-value → "" | quoted | no-whitespace-string + +no-whitespace-string → utf-16-char-no-whitespace* + +sequential-argument → ":" sequential-value {sequential-argument} + +sequential-value → "" | quoted | sequential-string + +sequential-string → utf-16-char-no-angle-or-colon* + +quoted → "'" string "'" | """ string """ + +identifier → alphanumeric+ + +alphanumeric → "a" | "b" | "c" | "d" + | "e" | "f" | "g" | "h" + | "i" | "j" | "k" | "l" + | "m" | "n" | "o" | "p" + | "q" | "r" | "s" | "t" + | "u" | "v" | "w" | "x" + | "y" | "z" | "_" | "0" + | "1" | "2" | "3" | "4" + | "5" | "6" | "7" | "8" + | "9" +``` diff --git a/src/utils/shiki/bnf.tmLanguage.json b/src/utils/shiki/bnf.tmLanguage.json new file mode 100644 index 000000000..065e3d9a3 --- /dev/null +++ b/src/utils/shiki/bnf.tmLanguage.json @@ -0,0 +1,82 @@ +{ + "$schema": "https://raw.githubusercontent.com/martinring/tmlanguage/master/tmlanguage.json", + "name": "bnf", + "scopeName": "source.bnf", + "patterns": [{ "include": "#comment" }, { "include": "#rule" }, { "include": "#meta" }, { "include": "#strings" }], + "repository": { + "comment": { + "name": "comment.line.semicolon.bnf", + "match": ";.*$" + }, + + "rule": { + "name": "meta.rule.bnf", + "begin": "^(\\s*)([A-Za-z0-9_-]+)(\\s*→)", + "beginCaptures": { + "2": { "name": "entity.name.function.nonterminal.bnf" }, + "3": { "name": "keyword.reserved.arrow.bnf" } + }, + "end": "(?=^\\s*[A-Za-z0-9_-]+\\s*→|\\Z)", + "patterns": [ + { "include": "#tripleQuotedNonterminal" }, + { "include": "#strings" }, + { "include": "#meta" }, + { + "match": "\\b[A-Za-z0-9_-]+\\b", + "name": "variable.language.nonterminal.bnf" + } + ] + }, + + "tripleQuotedNonterminal": { + "name": "meta.triplequoted.nonterminal.bnf", + "begin": "\"{3}", + "beginCaptures": { + "0": { "name": "string.quoted.double.bnf" } + }, + "end": "\"{3}", + "endCaptures": { + "0": { "name": "string.quoted.double.bnf" } + }, + "patterns": [ + { + "match": "\\b[A-Za-z0-9_-]+\\b", + "name": "variable.language.nonterminal.bnf" + } + ] + }, + + "strings": { + "patterns": [ + { + "name": "string.quoted.double.bnf", + "begin": "\"", + "end": "\"", + "patterns": [{ "match": "\"\"", "name": "constant.character.escape.doublequote.bnf" }] + }, + { + "name": "string.quoted.single.bnf", + "begin": "'", + "end": "'" + } + ] + }, + + "meta": { + "patterns": [ + { + "match": "→", + "name": "keyword.reserved.arrow.bnf" + }, + { + "match": "\\|", + "name": "keyword.reserved.choice.bnf" + }, + { + "match": "[{}()]", + "name": "punctuation.section.group.bnf" + } + ] + } + } +} diff --git a/src/utils/shiki/mm.tmLanguage.json b/src/utils/shiki/mm.tmLanguage.json index 5788d0d86..da17c0396 100644 --- a/src/utils/shiki/mm.tmLanguage.json +++ b/src/utils/shiki/mm.tmLanguage.json @@ -4,44 +4,84 @@ "patterns": [ { "name": "meta.tag.mm", - "begin": "(<)(/?|!?)([a-zA-Z0-9_#]+)", + "begin": "()", + "end": "/?>", "endCaptures": { - "1": { "name": "constant.language.tag.mm" } + "1": { "name": "punctuation.definition.tag.mm" } }, "patterns": [ { - "name": "string.quoted.single.argument.mm", - "match": "(:)'([^']*)'", + "name": "variable.parameter.sequenced.quoted.mm", + "match": "(\\s*)(:)(\".*\")", "captures": { - "1": { "name": "constant.language.tag.mm" }, - "2": { "name": "string.quoted.single.argument.mm" } + "2": { "name": "punctuation.definition.identifier.mm" }, + "3": { "name": "string.quoted.double.mm" } } }, { - "name": "string.quoted.double.argument.mm", - "match": "(:)\"([^\"]*)\"", + "name": "variable.parameter.sequenced.single-quoted.mm", + "match": "(\\s*)(:)('.*')", "captures": { - "1": { "name": "constant.language.tag.mm" }, - "2": { "name": "string.quoted.double.argument.mm" } + "2": { "name": "punctuation.definition.identifier.mm" }, + "3": { "name": "string.quoted.single.mm" } } }, { - "name": "variable.language.argument.mm", - "match": "(:)([^:\\s\"'>]+(?:\\[[^\\]]*\\])?)", + "name": "variable.parameter.sequenced.unquoted.mm", + "match": "(\\s*)(:)(((\\\\>)|(\\\\:)|[^>:])*)", "captures": { - "1": { "name": "constant.language.tag.mm" }, - "2": { "name": "variable.language.argument.mm" } + "2": { "name": "punctuation.definition.identifier.mm" }, + "3": { "name": "string.unquoted.mm" } + } + }, + { + "name": "variable.parameter.named-argument.quoted.mm", + "match": "(\\s+)([a-z_0-9]+)(=)(\".*\")", + "captures": { + "2": { "name": "keyword.reserved.identifier.mm" }, + "3": { "name": "punctuation.definition.identifier.mm" }, + "4": { "name": "string.quoted.double.mm" } + } + }, + { + "name": "variable.parameter.named-argument.single-quoted.mm", + "match": "(\\s+)([a-z_0-9]+)(=)('.*')", + "captures": { + "2": { "name": "keyword.reserved.identifier.mm" }, + "3": { "name": "punctuation.definition.identifier.mm" }, + "4": { "name": "string.quoted.single.mm" } + } + }, + { + "name": "variable.parameter.named-argument.unquoted.mm", + "match": "(\\s+)([a-z_0-9]+)(=)([^ >:]*)", + "captures": { + "2": { "name": "keyword.reserved.identifier.mm" }, + "3": { "name": "punctuation.definition.identifier.mm" }, + "4": { "name": "string.unquoted.mm" } + } + }, + { + "name": "variable.parameter.inverse-flag.mm", + "match": "(\\s+)(![a-z_0-9]+)", + "captures": { + "2": { "name": "support.type.inverse-flag.mm" } + } + }, + { + "name": "variable.parameter.flag.mm", + "match": "(\\s+)([a-z_0-9]+)", + "captures": { + "2": { "name": "support.type.flag.mm" } } } ] } ], - "repository": {}, - "scopeName": "text.mm" + "scopeName": "text.altmm", + "repository": {} } From e8c574cfb48dd5e67cab4703cc8d2e00672e60f5 Mon Sep 17 00:00:00 2001 From: Strokkur24 Date: Fri, 7 Nov 2025 20:45:38 +0100 Subject: [PATCH 2/5] fix: dash is also a valid identifier character --- .../docs/adventure/minimessage/specification.md | 6 +++--- src/utils/shiki/mm.tmLanguage.json | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/content/docs/adventure/minimessage/specification.md b/src/content/docs/adventure/minimessage/specification.md index c989460a4..1d486918d 100644 --- a/src/content/docs/adventure/minimessage/specification.md +++ b/src/content/docs/adventure/minimessage/specification.md @@ -4,7 +4,7 @@ slug: adventure/minimessage/specification description: A developer-facing specification of the MiniMessage format. tableOfContents: minHeadingLevel: 2 - maxHeadingLevel: 5 + maxHeadingLevel: 4 --- This document outlines the MiniMessage format in detail to aid developers who wish to implement their own MiniMessage @@ -189,7 +189,7 @@ This section defines miscellaneous behavior of common parts. ### Identifiers -All identifiers must be lowercased and contain only alphanumerical characters or `_`. All identifiers +All identifiers must be lowercased and contain only alphanumerical characters, `_`, or '-'. All identifiers used as named argument names should be unique. ### Quoting @@ -285,5 +285,5 @@ alphanumeric → "a" | "b" | "c" | "d" | "y" | "z" | "_" | "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" - | "9" + | "9" | "-" ``` diff --git a/src/utils/shiki/mm.tmLanguage.json b/src/utils/shiki/mm.tmLanguage.json index da17c0396..6eff13b55 100644 --- a/src/utils/shiki/mm.tmLanguage.json +++ b/src/utils/shiki/mm.tmLanguage.json @@ -4,7 +4,7 @@ "patterns": [ { "name": "meta.tag.mm", - "begin": "(:]*)", + "match": "(\\s+)([a-z_0-9-]+)(=)([^ >:]*)", "captures": { "2": { "name": "keyword.reserved.identifier.mm" }, "3": { "name": "punctuation.definition.identifier.mm" }, @@ -67,14 +67,14 @@ }, { "name": "variable.parameter.inverse-flag.mm", - "match": "(\\s+)(![a-z_0-9]+)", + "match": "(\\s+)(![a-z_0-9-]+)", "captures": { "2": { "name": "support.type.inverse-flag.mm" } } }, { "name": "variable.parameter.flag.mm", - "match": "(\\s+)([a-z_0-9]+)", + "match": "(\\s+)([a-z_0-9-]+)", "captures": { "2": { "name": "support.type.flag.mm" } } From b0fb9faaa62e6d7877b36fe37eae0c959f15cd1b Mon Sep 17 00:00:00 2001 From: Strokkur24 Date: Fri, 7 Nov 2025 20:48:47 +0100 Subject: [PATCH 3/5] fix: spelling --- src/content/docs/adventure/minimessage/specification.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/content/docs/adventure/minimessage/specification.md b/src/content/docs/adventure/minimessage/specification.md index 1d486918d..4394acca1 100644 --- a/src/content/docs/adventure/minimessage/specification.md +++ b/src/content/docs/adventure/minimessage/specification.md @@ -8,7 +8,7 @@ tableOfContents: --- This document outlines the MiniMessage format in detail to aid developers who wish to implement their own MiniMessage -parser from scratch or understand the internal processes happening during the parsing of MiniMesssage formatted strings. +parser from scratch or understand the internal processes happening during the parsing of MiniMessage formatted strings. The keywords “MUST”, “MUST NOT”, “REQUIRED”, “SHALL”, “SHALL NOT”, “SHOULD”, “SHOULD NOT”, “RECOMMENDED”, “MAY”, and “OPTIONAL” in this document are to be interpreted as described in @@ -123,7 +123,7 @@ Here is an example for valid valued named arguments: And example for invalid valued named arguments: ```mm - + ``` :::note From bb9a4ebe5aca32d13c285914b8bfcf1be01f070f Mon Sep 17 00:00:00 2001 From: Strokkur24 Date: Fri, 7 Nov 2025 20:49:55 +0100 Subject: [PATCH 4/5] fix: minor issue in mm tmLanguage scopeName --- src/utils/shiki/mm.tmLanguage.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/utils/shiki/mm.tmLanguage.json b/src/utils/shiki/mm.tmLanguage.json index 6eff13b55..05d862b9c 100644 --- a/src/utils/shiki/mm.tmLanguage.json +++ b/src/utils/shiki/mm.tmLanguage.json @@ -82,6 +82,6 @@ ] } ], - "scopeName": "text.altmm", - "repository": {} + "repository": {}, + "scopeName": "text.mm" } From 7e8513ee3f3bc0f5a20fbaad7e22970f56361a2e Mon Sep 17 00:00:00 2001 From: Strokkur24 Date: Fri, 7 Nov 2025 21:35:20 +0100 Subject: [PATCH 5/5] style: make flags and named arguments the same color --- src/utils/shiki/mm.tmLanguage.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/utils/shiki/mm.tmLanguage.json b/src/utils/shiki/mm.tmLanguage.json index 05d862b9c..e7683527e 100644 --- a/src/utils/shiki/mm.tmLanguage.json +++ b/src/utils/shiki/mm.tmLanguage.json @@ -69,14 +69,14 @@ "name": "variable.parameter.inverse-flag.mm", "match": "(\\s+)(![a-z_0-9-]+)", "captures": { - "2": { "name": "support.type.inverse-flag.mm" } + "2": { "name": "keyword.reserved.inverse-flag.mm" } } }, { "name": "variable.parameter.flag.mm", "match": "(\\s+)([a-z_0-9-]+)", "captures": { - "2": { "name": "support.type.flag.mm" } + "2": { "name": "keyword.reserved.flag.mm" } } } ]