From d5aa057b348c3698db3457314a98bf6aafb723f5 Mon Sep 17 00:00:00 2001 From: flofriday Date: Mon, 25 Aug 2025 19:01:05 +0200 Subject: [PATCH] fix(kotlin) number literals Previously kotlin resused the java number parsing but there are significant differnces in how the two languages express numbers causing false-negatives and false-positives. --- CHANGES.md | 1 + src/languages/kotlin.js | 31 ++++++++++++++---- test/markup/kotlin/numbers.expect.txt | 45 +++++++++++++++++++++++++++ test/markup/kotlin/numbers.txt | 45 +++++++++++++++++++++++++++ 4 files changed, 116 insertions(+), 6 deletions(-) create mode 100644 test/markup/kotlin/numbers.expect.txt create mode 100644 test/markup/kotlin/numbers.txt diff --git a/CHANGES.md b/CHANGES.md index b1413c29a5..057f74d9a2 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -26,6 +26,7 @@ Core Grammars: - enh(json) add json5 support [Kerry Shetline][] - fix(css) `unicode-range` parsing, issue #4253 [Kerry Shetline][] - fix(csharp) Support digit separators [te-ing][] +- fix(kotlin) Improve number literal parsing[Florian Freitag][] Documentation: diff --git a/src/languages/kotlin.js b/src/languages/kotlin.js index c31f5a9d49..c63be9b7d5 100644 --- a/src/languages/kotlin.js +++ b/src/languages/kotlin.js @@ -6,8 +6,6 @@ Category: common */ -import { NUMERIC } from "./lib/java.js"; - export default function(hljs) { const KEYWORDS = { keyword: @@ -100,10 +98,31 @@ export default function(hljs) { ] }; - // https://kotlinlang.org/docs/reference/whatsnew11.html#underscores-in-numeric-literals - // According to the doc above, the number mode of kotlin is the same as java 8, - // so the code below is copied from java.js - const KOTLIN_NUMBER_MODE = NUMERIC; + const decimalDigits = '[0-9](_*[0-9])*'; + const frac = `\\.(${decimalDigits})`; + const KOTLIN_NUMBER_MODE = { + className: 'number', + variants: [ + // DecimalFloatingPointLiteral + // including ExponentPart + { begin: `(\\b(${decimalDigits})((${frac})|\\.)?|(${frac}))` + + `[eE][+-]?(${decimalDigits})[fF]?\\b` }, + // excluding ExponentPart + { begin: `\\b(${decimalDigits})((${frac})[fF]?\\b|\\.([fF]\\b)?)` }, + { begin: `(${frac})[fF]?\\b` }, + { begin: `\\b(${decimalDigits})[fF]\\b` }, + + // DecimalIntegerLiteral + { begin: '\\b(0|[1-9](_*[0-9])*)[uU]?L?\\b' }, + + // HexIntegerLiteral + { begin: `\\b0[xX]([0-9a-fA-F](_*[0-9a-fA-F])*)[uU]?L?\\b` }, + + // BinaryIntegerLiteral + { begin: '\\b0[bB][01](_*[01])*[uU]?L?\\b' }, + ], + relevance: 0 + }; const KOTLIN_NESTED_COMMENT = hljs.COMMENT( '/\\*', '\\*/', { contains: [ hljs.C_BLOCK_COMMENT_MODE ] } diff --git a/test/markup/kotlin/numbers.expect.txt b/test/markup/kotlin/numbers.expect.txt new file mode 100644 index 0000000000..6036cc062b --- /dev/null +++ b/test/markup/kotlin/numbers.expect.txt @@ -0,0 +1,45 @@ +// Postive cases +1 +123 +123L +123f +123F +123u +123U +123UL +123uL +1.23 +1.23f +1.23F +.123 +.123f +.123F +0b10101 +0B10101 +0b10101u +0B10101u +0b10101U +0B10101U +0b10101L +0B10101L +0b10101uL +0B10101uL +0b10101UL +0B10101UL +0xff8842 +0Xff8842 +0xff8842u +0Xff8842u +0xff8842U +0Xff8842U +0xff8842L +0Xff8842L +0xff8842uL +0Xff8842uL +0xff8842UL +0Xff8842UL + +// Negative cases +123l +123d +123D diff --git a/test/markup/kotlin/numbers.txt b/test/markup/kotlin/numbers.txt new file mode 100644 index 0000000000..75432e9e40 --- /dev/null +++ b/test/markup/kotlin/numbers.txt @@ -0,0 +1,45 @@ +// Postive cases +1 +123 +123L +123f +123F +123u +123U +123UL +123uL +1.23 +1.23f +1.23F +.123 +.123f +.123F +0b10101 +0B10101 +0b10101u +0B10101u +0b10101U +0B10101U +0b10101L +0B10101L +0b10101uL +0B10101uL +0b10101UL +0B10101UL +0xff8842 +0Xff8842 +0xff8842u +0Xff8842u +0xff8842U +0Xff8842U +0xff8842L +0Xff8842L +0xff8842uL +0Xff8842uL +0xff8842UL +0Xff8842UL + +// Negative cases +123l +123d +123D