diff --git a/shortcuts/doc/docs_fetch.go b/shortcuts/doc/docs_fetch.go index 65a4e890..fc299d36 100644 --- a/shortcuts/doc/docs_fetch.go +++ b/shortcuts/doc/docs_fetch.go @@ -66,6 +66,7 @@ var DocsFetch = common.Shortcut{ fmt.Fprintf(w, "# %s\n\n", title) } if md, ok := result["markdown"].(string); ok { + md = NormalizeLarkTables(md) fmt.Fprintln(w, md) } if hasMore, ok := result["has_more"].(bool); ok && hasMore { diff --git a/shortcuts/doc/lark_table.go b/shortcuts/doc/lark_table.go new file mode 100644 index 00000000..f856e210 --- /dev/null +++ b/shortcuts/doc/lark_table.go @@ -0,0 +1,150 @@ +// Copyright (c) 2026 Lark Technologies Pte. Ltd. +// SPDX-License-Identifier: MIT + +package doc + +import ( + "regexp" + "strings" +) + +var ( + larkTableRE = regexp.MustCompile(`(?is)]*>(.*?)`) + larkTrRE = regexp.MustCompile(`(?is)]*>(.*?)`) + larkTdRE = regexp.MustCompile(`(?is)]*>(.*?)`) + gridRE = regexp.MustCompile(`(?is)]*>(.*?)`) + columnRE = regexp.MustCompile(`(?is)]*>(.*?)`) + textTagRE = regexp.MustCompile(`(?is)]*>(.*?)`) + larkTagRE = regexp.MustCompile(`(?i)]+>`) + brTagRE = regexp.MustCompile(`(?i)`) + headerAttrRE = regexp.MustCompile(`(?i)header-row="false"`) +) + +// NormalizeLarkTables converts Lark-formatted table XML-like structures () +// into standard GitHub-flavored Markdown (GFM) pipe tables. +// If any cell contains a fenced code block (```), it returns the original markup +// to avoid breaking multi-line code rendering in pipe tables. +func NormalizeLarkTables(md string) string { + return larkTableRE.ReplaceAllStringFunc(md, func(block string) string { + m := larkTableRE.FindStringSubmatch(block) + if len(m) < 2 { + return block + } + inner := m[1] + headerRow := !headerAttrRE.MatchString(block) + + rowMatches := larkTrRE.FindAllStringSubmatch(inner, -1) + if len(rowMatches) == 0 { + return block + } + + var rows [][]string + maxCols := 0 + for _, rm := range rowMatches { + cellMatches := larkTdRE.FindAllStringSubmatch(rm[1], -1) + var cells []string + for _, cm := range cellMatches { + rawCell := cm[1] + // CRITICAL: GFM pipe tables cannot contain multi-line fenced code blocks. + // Check raw content before any tag stripping/cleaning to avoid brittleness. + if strings.Contains(rawCell, "```") { + return block + } + cells = append(cells, cleanCell(rawCell)) + } + if len(cells) > maxCols { + maxCols = len(cells) + } + rows = append(rows, cells) + } + + if maxCols == 0 || len(rows) == 0 { + return block + } + + // Row padding: Ensure all rows have the same number of columns + for i := range rows { + for len(rows[i]) < maxCols { + rows[i] = append(rows[i], "") + } + } + + var lines []string + startBody := 0 + + if headerRow && len(rows) > 0 { + lines = append(lines, "| "+joinCells(rows[0])+" |") + seps := make([]string, maxCols) + for i := range seps { + seps[i] = "---" + } + lines = append(lines, "| "+strings.Join(seps, " | ")+" |") + startBody = 1 + } + + for i := startBody; i < len(rows); i++ { + lines = append(lines, "| "+joinCells(rows[i])+" |") + } + + return strings.Join(lines, "\n") + }) +} + +// joinCells joins a slice of strings with the GFM pipe separator, escaping existing pipes. +func joinCells(cells []string) string { + escaped := make([]string, len(cells)) + for i, c := range cells { + c = strings.ReplaceAll(c, "\n", "
") + c = strings.ReplaceAll(c, "|", "\\|") + escaped[i] = c + } + return strings.Join(escaped, " | ") +} + +// cleanCell strips Lark-specific tags and flattens nested layout structures. +func cleanCell(s string) string { + s = strings.TrimSpace(s) + s = flattenGrid(s) + s = stripTextTags(s) + s = larkTagRE.ReplaceAllString(s, "") + s = brTagRE.ReplaceAllString(s, "\n") + return strings.TrimSpace(s) +} + +// flattenGrid handles ... layouts by joining columns with newlines. +func flattenGrid(s string) string { + for { + n := gridRE.ReplaceAllStringFunc(s, func(g string) string { + m := gridRE.FindStringSubmatch(g) + if len(m) < 2 { + return g + } + cols := columnRE.FindAllStringSubmatch(m[1], -1) + if len(cols) == 0 { + return strings.TrimSpace(m[1]) + } + var parts []string + for _, c := range cols { + parts = append(parts, cleanCell(c[1])) + } + return strings.Join(parts, "\n") + }) + if n == s { + break + } + s = n + } + return s +} + +// stripTextTags removes wrappers but preserves their content. +func stripTextTags(s string) string { + for { + n := textTagRE.ReplaceAllString(s, "$1") + if n == s { + break + } + s = n + } + return s +} diff --git a/shortcuts/doc/lark_table_test.go b/shortcuts/doc/lark_table_test.go new file mode 100644 index 00000000..dac499d5 --- /dev/null +++ b/shortcuts/doc/lark_table_test.go @@ -0,0 +1,99 @@ +// Copyright (c) 2026 Lark Technologies Pte. Ltd. +// SPDX-License-Identifier: MIT + +package doc + +import ( + "strings" + "testing" +) + +func TestNormalizeLarkTables_Basic(t *testing.T) { + input := `NameAgeAlice30` + + result := NormalizeLarkTables(input) + + if !strings.Contains(result, "| Name | Age |") { + t.Errorf("expected GFM header row, got:\n%s", result) + } + if !strings.Contains(result, "| --- | --- |") { + t.Errorf("expected separator row, got:\n%s", result) + } + if !strings.Contains(result, "| Alice | 30 |") { + t.Errorf("expected data row, got:\n%s", result) + } +} + +func TestNormalizeLarkTables_NoHeaderRow(t *testing.T) { + input := `AB` + + result := NormalizeLarkTables(input) + + if strings.Contains(result, "| --- |") { + t.Errorf("should not have separator when header-row=false, got:\n%s", result) + } + if !strings.Contains(result, "| A | B |") { + t.Errorf("expected data row, got:\n%s", result) + } +} + +func TestNormalizeLarkTables_PreservesCodeBlocks(t *testing.T) { + input := "```go\nfmt.Println()\n```" + + result := NormalizeLarkTables(input) + + if !strings.Contains(result, "") { + t.Errorf("should preserve original lark-table when cells contain fenced code, got:\n%s", result) + } +} + +func TestNormalizeLarkTables_PipeEscape(t *testing.T) { + input := `A|BC` + + result := NormalizeLarkTables(input) + + if !strings.Contains(result, `A\|B`) { + t.Errorf("pipe in cell should be escaped, got:\n%s", result) + } +} + +func TestNormalizeLarkTables_UnevenColumns(t *testing.T) { + input := `ABC` + result := NormalizeLarkTables(input) + if !strings.Contains(result, "| C | |") { + t.Errorf("expected row padding for uneven columns, got:\n%s", result) + } +} + +func TestNormalizeLarkTables_FlattenGrid(t *testing.T) { + input := `Col 1Col 2` + result := NormalizeLarkTables(input) + // Grid columns should be joined with newlines, which in GFM table cells become
+ if !strings.Contains(result, "Col 1
Col 2") { + t.Errorf("expected flattened grid with
, got:\n%s", result) + } +} + +func TestNormalizeLarkTables_StripTextTags(t *testing.T) { + input := `Red Text` + result := NormalizeLarkTables(input) + if !strings.Contains(result, "| Red Text |") { + t.Errorf("expected text tag to be stripped, got:\n%s", result) + } +} + +func TestNormalizeLarkTables_BrTag(t *testing.T) { + input := `Line 1
Line 2
` + result := NormalizeLarkTables(input) + if !strings.Contains(result, "Line 1
Line 2") { + t.Errorf("expected
to be normalized to
, got:\n%s", result) + } +} + +func TestNormalizeLarkTables_NoTable(t *testing.T) { + input := "# Hello\n\nJust some text." + result := NormalizeLarkTables(input) + if result != input { + t.Errorf("should not modify text without lark-table") + } +}