From 139f46665716a6975bc5487af354134f0517cb6c Mon Sep 17 00:00:00 2001 From: liuhe Date: Fri, 3 Apr 2026 23:24:08 +0800 Subject: [PATCH 1/2] feat(docs): normalize to GFM pipe tables in docs +fetch output When docs +fetch returns markdown containing custom tags, they are not renderable in standard markdown viewers. This change adds automatic conversion of blocks to GFM (GitHub Flavored Markdown) pipe tables during the pretty-print output phase. Features: - Converts // to standard | pipe | tables - Handles header-row attribute (defaults to true) - Escapes pipe characters in cell content - Falls back to original when cells contain fenced code blocks - Strips nested // tags during conversion Includes unit tests for basic table, no-header, code-block fallback, and pipe escaping scenarios. --- shortcuts/doc/docs_fetch.go | 1 + shortcuts/doc/lark_table.go | 152 +++++++++++++++++++++++++++++++ shortcuts/doc/lark_table_test.go | 63 +++++++++++++ 3 files changed, 216 insertions(+) create mode 100644 shortcuts/doc/lark_table.go create mode 100644 shortcuts/doc/lark_table_test.go diff --git a/shortcuts/doc/docs_fetch.go b/shortcuts/doc/docs_fetch.go index 65a4e890..fc299d36 100644 --- a/shortcuts/doc/docs_fetch.go +++ b/shortcuts/doc/docs_fetch.go @@ -66,6 +66,7 @@ var DocsFetch = common.Shortcut{ fmt.Fprintf(w, "# %s\n\n", title) } if md, ok := result["markdown"].(string); ok { + md = NormalizeLarkTables(md) fmt.Fprintln(w, md) } if hasMore, ok := result["has_more"].(bool); ok && hasMore { diff --git a/shortcuts/doc/lark_table.go b/shortcuts/doc/lark_table.go new file mode 100644 index 00000000..fced2f34 --- /dev/null +++ b/shortcuts/doc/lark_table.go @@ -0,0 +1,152 @@ +// Copyright (c) 2026 Lark Technologies Pte. Ltd. +// SPDX-License-Identifier: MIT + +package doc + +import ( + "fmt" + "regexp" + "strings" +) + +var ( + larkTableRE = regexp.MustCompile(`(?is)]*>(.*?)`) + larkTrRE = regexp.MustCompile(`(?is)]*>(.*?)`) + larkTdRE = regexp.MustCompile(`(?is)]*>(.*?)`) + gridRE = regexp.MustCompile(`(?is)]*>(.*?)`) + columnRE = regexp.MustCompile(`(?is)]*>(.*?)`) + textTagRE = regexp.MustCompile(`(?is)]*>(.*?)`) + larkTagRE = regexp.MustCompile(`(?i)]+>`) + brTagRE = regexp.MustCompile(`(?i)`) + headerAttrRE = regexp.MustCompile(`header-row="false"`) +) + +// NormalizeLarkTables converts blocks in markdown to GFM pipe tables. +func NormalizeLarkTables(md string) string { + return larkTableRE.ReplaceAllStringFunc(md, func(block string) string { + m := larkTableRE.FindStringSubmatch(block) + if len(m) < 2 { + return block + } + inner := m[1] + headerRow := !headerAttrRE.MatchString(block) + + rowMatches := larkTrRE.FindAllStringSubmatch(inner, -1) + if len(rowMatches) == 0 { + return block + } + + var rows [][]string + maxCols := 0 + for _, rm := range rowMatches { + cellMatches := larkTdRE.FindAllStringSubmatch(rm[1], -1) + var cells []string + for _, cm := range cellMatches { + cells = append(cells, cleanCell(cm[1])) + } + if len(cells) > maxCols { + maxCols = len(cells) + } + rows = append(rows, cells) + } + + if maxCols == 0 || len(rows) == 0 { + return block + } + + // Normalize column count + for i := range rows { + for len(rows[i]) < maxCols { + rows[i] = append(rows[i], "") + } + } + + // Check if any cell contains fenced code blocks — fall back to original if so + for _, row := range rows { + for _, cell := range row { + if strings.Contains(cell, "```") { + return block // Keep original for complex content + } + } + } + + var lines []string + startBody := 0 + + if headerRow && len(rows) > 0 { + lines = append(lines, "| "+joinCells(rows[0])+" |") + seps := make([]string, maxCols) + for i := range seps { + seps[i] = "---" + } + lines = append(lines, "| "+strings.Join(seps, " | ")+" |") + startBody = 1 + } + + for i := startBody; i < len(rows); i++ { + lines = append(lines, "| "+joinCells(rows[i])+" |") + } + + return strings.Join(lines, "\n") + }) +} + +func joinCells(cells []string) string { + escaped := make([]string, len(cells)) + for i, c := range cells { + c = strings.ReplaceAll(c, "\n", "
") + c = strings.ReplaceAll(c, "|", "\\|") + escaped[i] = c + } + return strings.Join(escaped, " | ") +} + +func cleanCell(s string) string { + s = strings.TrimSpace(s) + s = flattenGrid(s) + s = stripTextTags(s) + s = larkTagRE.ReplaceAllString(s, "") + s = brTagRE.ReplaceAllString(s, "\n") + return strings.TrimSpace(s) +} + +func flattenGrid(s string) string { + for { + n := gridRE.ReplaceAllStringFunc(s, func(g string) string { + m := gridRE.FindStringSubmatch(g) + if len(m) < 2 { + return g + } + cols := columnRE.FindAllStringSubmatch(m[1], -1) + if len(cols) == 0 { + return strings.TrimSpace(m[1]) + } + var parts []string + for _, c := range cols { + parts = append(parts, cleanCell(c[1])) + } + return strings.Join(parts, "\n") + }) + if n == s { + break + } + s = n + } + return s +} + +func stripTextTags(s string) string { + for { + n := textTagRE.ReplaceAllString(s, "$1") + if n == s { + break + } + s = n + } + return s +} + +func init() { + // Silence unused import warnings + _ = fmt.Sprint +} diff --git a/shortcuts/doc/lark_table_test.go b/shortcuts/doc/lark_table_test.go new file mode 100644 index 00000000..62dcc04f --- /dev/null +++ b/shortcuts/doc/lark_table_test.go @@ -0,0 +1,63 @@ +// Copyright (c) 2026 Lark Technologies Pte. Ltd. +// SPDX-License-Identifier: MIT + +package doc + +import ( + "strings" + "testing" +) + +func TestNormalizeLarkTables_Basic(t *testing.T) { + input := `NameAgeAlice30` + + result := NormalizeLarkTables(input) + + if !strings.Contains(result, "| Name | Age |") { + t.Errorf("expected GFM header row, got:\n%s", result) + } + if !strings.Contains(result, "| --- | --- |") { + t.Errorf("expected separator row, got:\n%s", result) + } + if !strings.Contains(result, "| Alice | 30 |") { + t.Errorf("expected data row, got:\n%s", result) + } +} + +func TestNormalizeLarkTables_NoHeaderRow(t *testing.T) { + input := `AB` + + result := NormalizeLarkTables(input) + + if strings.Contains(result, "| --- |") { + t.Errorf("should not have separator when header-row=false, got:\n%s", result) + } +} + +func TestNormalizeLarkTables_PreservesCodeBlocks(t *testing.T) { + input := "```go\nfmt.Println()\n```" + + result := NormalizeLarkTables(input) + + if !strings.Contains(result, "") { + t.Errorf("should preserve original lark-table when cells contain fenced code, got:\n%s", result) + } +} + +func TestNormalizeLarkTables_PipeEscape(t *testing.T) { + input := `A|BC` + + result := NormalizeLarkTables(input) + + if !strings.Contains(result, `A\|B`) { + t.Errorf("pipe in cell should be escaped, got:\n%s", result) + } +} + +func TestNormalizeLarkTables_NoTable(t *testing.T) { + input := "# Hello\n\nJust some text." + result := NormalizeLarkTables(input) + if result != input { + t.Errorf("should not modify text without lark-table") + } +} From f370afc230f2986c3ead7fb407feeaeb9fae99a4 Mon Sep 17 00:00:00 2001 From: liuhe Date: Fri, 3 Apr 2026 23:32:38 +0800 Subject: [PATCH 2/2] chore(docs): address review feedback for lark-table normalization - Refactor NormalizeLarkTables to check for fenced code blocks on raw content - Remove unused fmt import and init() shim - Make headerAttrRE case-insensitive - Add comprehensive unit tests for edge cases: - Uneven column counts (row padding) - Grid/column layout flattening - Text tag stripping -
tag normalization - Add positive assertion to TestNormalizeLarkTables_NoHeaderRow - Improve docstring coverage for exported functions --- shortcuts/doc/lark_table.go | 52 +++++++++++++++----------------- shortcuts/doc/lark_table_test.go | 36 ++++++++++++++++++++++ 2 files changed, 61 insertions(+), 27 deletions(-) diff --git a/shortcuts/doc/lark_table.go b/shortcuts/doc/lark_table.go index fced2f34..f856e210 100644 --- a/shortcuts/doc/lark_table.go +++ b/shortcuts/doc/lark_table.go @@ -4,24 +4,26 @@ package doc import ( - "fmt" "regexp" "strings" ) var ( - larkTableRE = regexp.MustCompile(`(?is)]*>(.*?)
`) - larkTrRE = regexp.MustCompile(`(?is)]*>(.*?)`) - larkTdRE = regexp.MustCompile(`(?is)]*>(.*?)`) - gridRE = regexp.MustCompile(`(?is)]*>(.*?)`) - columnRE = regexp.MustCompile(`(?is)]*>(.*?)`) - textTagRE = regexp.MustCompile(`(?is)]*>(.*?)`) - larkTagRE = regexp.MustCompile(`(?i)]+>`) - brTagRE = regexp.MustCompile(`(?i)`) - headerAttrRE = regexp.MustCompile(`header-row="false"`) + larkTableRE = regexp.MustCompile(`(?is)]*>(.*?)
`) + larkTrRE = regexp.MustCompile(`(?is)]*>(.*?)`) + larkTdRE = regexp.MustCompile(`(?is)]*>(.*?)`) + gridRE = regexp.MustCompile(`(?is)]*>(.*?)`) + columnRE = regexp.MustCompile(`(?is)]*>(.*?)`) + textTagRE = regexp.MustCompile(`(?is)]*>(.*?)`) + larkTagRE = regexp.MustCompile(`(?i)]+>`) + brTagRE = regexp.MustCompile(`(?i)`) + headerAttrRE = regexp.MustCompile(`(?i)header-row="false"`) ) -// NormalizeLarkTables converts blocks in markdown to GFM pipe tables. +// NormalizeLarkTables converts Lark-formatted table XML-like structures () +// into standard GitHub-flavored Markdown (GFM) pipe tables. +// If any cell contains a fenced code block (```), it returns the original markup +// to avoid breaking multi-line code rendering in pipe tables. func NormalizeLarkTables(md string) string { return larkTableRE.ReplaceAllStringFunc(md, func(block string) string { m := larkTableRE.FindStringSubmatch(block) @@ -42,7 +44,13 @@ func NormalizeLarkTables(md string) string { cellMatches := larkTdRE.FindAllStringSubmatch(rm[1], -1) var cells []string for _, cm := range cellMatches { - cells = append(cells, cleanCell(cm[1])) + rawCell := cm[1] + // CRITICAL: GFM pipe tables cannot contain multi-line fenced code blocks. + // Check raw content before any tag stripping/cleaning to avoid brittleness. + if strings.Contains(rawCell, "```") { + return block + } + cells = append(cells, cleanCell(rawCell)) } if len(cells) > maxCols { maxCols = len(cells) @@ -54,22 +62,13 @@ func NormalizeLarkTables(md string) string { return block } - // Normalize column count + // Row padding: Ensure all rows have the same number of columns for i := range rows { for len(rows[i]) < maxCols { rows[i] = append(rows[i], "") } } - // Check if any cell contains fenced code blocks — fall back to original if so - for _, row := range rows { - for _, cell := range row { - if strings.Contains(cell, "```") { - return block // Keep original for complex content - } - } - } - var lines []string startBody := 0 @@ -91,6 +90,7 @@ func NormalizeLarkTables(md string) string { }) } +// joinCells joins a slice of strings with the GFM pipe separator, escaping existing pipes. func joinCells(cells []string) string { escaped := make([]string, len(cells)) for i, c := range cells { @@ -101,6 +101,7 @@ func joinCells(cells []string) string { return strings.Join(escaped, " | ") } +// cleanCell strips Lark-specific tags and flattens nested layout structures. func cleanCell(s string) string { s = strings.TrimSpace(s) s = flattenGrid(s) @@ -110,6 +111,7 @@ func cleanCell(s string) string { return strings.TrimSpace(s) } +// flattenGrid handles ... layouts by joining columns with newlines. func flattenGrid(s string) string { for { n := gridRE.ReplaceAllStringFunc(s, func(g string) string { @@ -135,6 +137,7 @@ func flattenGrid(s string) string { return s } +// stripTextTags removes wrappers but preserves their content. func stripTextTags(s string) string { for { n := textTagRE.ReplaceAllString(s, "$1") @@ -145,8 +148,3 @@ func stripTextTags(s string) string { } return s } - -func init() { - // Silence unused import warnings - _ = fmt.Sprint -} diff --git a/shortcuts/doc/lark_table_test.go b/shortcuts/doc/lark_table_test.go index 62dcc04f..dac499d5 100644 --- a/shortcuts/doc/lark_table_test.go +++ b/shortcuts/doc/lark_table_test.go @@ -32,6 +32,9 @@ func TestNormalizeLarkTables_NoHeaderRow(t *testing.T) { if strings.Contains(result, "| --- |") { t.Errorf("should not have separator when header-row=false, got:\n%s", result) } + if !strings.Contains(result, "| A | B |") { + t.Errorf("expected data row, got:\n%s", result) + } } func TestNormalizeLarkTables_PreservesCodeBlocks(t *testing.T) { @@ -54,6 +57,39 @@ func TestNormalizeLarkTables_PipeEscape(t *testing.T) { } } +func TestNormalizeLarkTables_UnevenColumns(t *testing.T) { + input := `ABC` + result := NormalizeLarkTables(input) + if !strings.Contains(result, "| C | |") { + t.Errorf("expected row padding for uneven columns, got:\n%s", result) + } +} + +func TestNormalizeLarkTables_FlattenGrid(t *testing.T) { + input := `Col 1Col 2` + result := NormalizeLarkTables(input) + // Grid columns should be joined with newlines, which in GFM table cells become
+ if !strings.Contains(result, "Col 1
Col 2") { + t.Errorf("expected flattened grid with
, got:\n%s", result) + } +} + +func TestNormalizeLarkTables_StripTextTags(t *testing.T) { + input := `Red Text` + result := NormalizeLarkTables(input) + if !strings.Contains(result, "| Red Text |") { + t.Errorf("expected text tag to be stripped, got:\n%s", result) + } +} + +func TestNormalizeLarkTables_BrTag(t *testing.T) { + input := `Line 1
Line 2
` + result := NormalizeLarkTables(input) + if !strings.Contains(result, "Line 1
Line 2") { + t.Errorf("expected
to be normalized to
, got:\n%s", result) + } +} + func TestNormalizeLarkTables_NoTable(t *testing.T) { input := "# Hello\n\nJust some text." result := NormalizeLarkTables(input)