Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions shortcuts/doc/docs_fetch.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ var DocsFetch = common.Shortcut{
fmt.Fprintf(w, "# %s\n\n", title)
}
if md, ok := result["markdown"].(string); ok {
md = NormalizeLarkTables(md)
fmt.Fprintln(w, md)
}
if hasMore, ok := result["has_more"].(bool); ok && hasMore {
Expand Down
150 changes: 150 additions & 0 deletions shortcuts/doc/lark_table.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
// Copyright (c) 2026 Lark Technologies Pte. Ltd.
// SPDX-License-Identifier: MIT

package doc

import (
"regexp"
"strings"
)

var (
larkTableRE = regexp.MustCompile(`(?is)<lark-table\b[^>]*>(.*?)</lark-table>`)
larkTrRE = regexp.MustCompile(`(?is)<lark-tr\b[^>]*>(.*?)</lark-tr>`)
larkTdRE = regexp.MustCompile(`(?is)<lark-td\b[^>]*>(.*?)</lark-td>`)
gridRE = regexp.MustCompile(`(?is)<grid\b[^>]*>(.*?)</grid>`)
columnRE = regexp.MustCompile(`(?is)<column\b[^>]*>(.*?)</column>`)
textTagRE = regexp.MustCompile(`(?is)<text\b[^>]*>(.*?)</text>`)
larkTagRE = regexp.MustCompile(`(?i)</?lark-[^>]+>`)
brTagRE = regexp.MustCompile(`(?i)<br\s*/?>`)
headerAttrRE = regexp.MustCompile(`(?i)header-row="false"`)
)

// NormalizeLarkTables converts Lark-formatted table XML-like structures (<lark-table>)
// into standard GitHub-flavored Markdown (GFM) pipe tables.
// If any cell contains a fenced code block (```), it returns the original markup
// to avoid breaking multi-line code rendering in pipe tables.
func NormalizeLarkTables(md string) string {
return larkTableRE.ReplaceAllStringFunc(md, func(block string) string {
m := larkTableRE.FindStringSubmatch(block)
if len(m) < 2 {
return block
}
inner := m[1]
headerRow := !headerAttrRE.MatchString(block)

rowMatches := larkTrRE.FindAllStringSubmatch(inner, -1)
if len(rowMatches) == 0 {
return block
}

var rows [][]string
maxCols := 0
for _, rm := range rowMatches {
cellMatches := larkTdRE.FindAllStringSubmatch(rm[1], -1)
var cells []string
for _, cm := range cellMatches {
rawCell := cm[1]
// CRITICAL: GFM pipe tables cannot contain multi-line fenced code blocks.
// Check raw content before any tag stripping/cleaning to avoid brittleness.
if strings.Contains(rawCell, "```") {
return block
}
cells = append(cells, cleanCell(rawCell))
}
if len(cells) > maxCols {
maxCols = len(cells)
}
rows = append(rows, cells)
}

if maxCols == 0 || len(rows) == 0 {
return block
}

// Row padding: Ensure all rows have the same number of columns
for i := range rows {
for len(rows[i]) < maxCols {
rows[i] = append(rows[i], "")
}
}

var lines []string
startBody := 0

if headerRow && len(rows) > 0 {
lines = append(lines, "| "+joinCells(rows[0])+" |")
seps := make([]string, maxCols)
for i := range seps {
seps[i] = "---"
}
lines = append(lines, "| "+strings.Join(seps, " | ")+" |")
startBody = 1
}

for i := startBody; i < len(rows); i++ {
lines = append(lines, "| "+joinCells(rows[i])+" |")
}

return strings.Join(lines, "\n")
})
}

// joinCells joins a slice of strings with the GFM pipe separator, escaping existing pipes.
func joinCells(cells []string) string {
escaped := make([]string, len(cells))
for i, c := range cells {
c = strings.ReplaceAll(c, "\n", "<br>")
c = strings.ReplaceAll(c, "|", "\\|")
escaped[i] = c
}
return strings.Join(escaped, " | ")
}

// cleanCell strips Lark-specific tags and flattens nested layout structures.
func cleanCell(s string) string {
s = strings.TrimSpace(s)
s = flattenGrid(s)
s = stripTextTags(s)
s = larkTagRE.ReplaceAllString(s, "")
s = brTagRE.ReplaceAllString(s, "\n")
return strings.TrimSpace(s)
}

// flattenGrid handles <grid><column>...</column></grid> layouts by joining columns with newlines.
func flattenGrid(s string) string {
for {
n := gridRE.ReplaceAllStringFunc(s, func(g string) string {
m := gridRE.FindStringSubmatch(g)
if len(m) < 2 {
return g
}
cols := columnRE.FindAllStringSubmatch(m[1], -1)
if len(cols) == 0 {
return strings.TrimSpace(m[1])
}
var parts []string
for _, c := range cols {
parts = append(parts, cleanCell(c[1]))
}
return strings.Join(parts, "\n")
})
if n == s {
break
}
s = n
}
return s
}

// stripTextTags removes <text color="..."> wrappers but preserves their content.
func stripTextTags(s string) string {
for {
n := textTagRE.ReplaceAllString(s, "$1")
if n == s {
break
}
s = n
}
return s
}
99 changes: 99 additions & 0 deletions shortcuts/doc/lark_table_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
// Copyright (c) 2026 Lark Technologies Pte. Ltd.
// SPDX-License-Identifier: MIT

package doc

import (
"strings"
"testing"
)

func TestNormalizeLarkTables_Basic(t *testing.T) {
input := `<lark-table><lark-tr><lark-td>Name</lark-td><lark-td>Age</lark-td></lark-tr><lark-tr><lark-td>Alice</lark-td><lark-td>30</lark-td></lark-tr></lark-table>`

result := NormalizeLarkTables(input)

if !strings.Contains(result, "| Name | Age |") {
t.Errorf("expected GFM header row, got:\n%s", result)
}
if !strings.Contains(result, "| --- | --- |") {
t.Errorf("expected separator row, got:\n%s", result)
}
if !strings.Contains(result, "| Alice | 30 |") {
t.Errorf("expected data row, got:\n%s", result)
}
}

func TestNormalizeLarkTables_NoHeaderRow(t *testing.T) {
input := `<lark-table header-row="false"><lark-tr><lark-td>A</lark-td><lark-td>B</lark-td></lark-tr></lark-table>`

result := NormalizeLarkTables(input)

if strings.Contains(result, "| --- |") {
t.Errorf("should not have separator when header-row=false, got:\n%s", result)
}
if !strings.Contains(result, "| A | B |") {
t.Errorf("expected data row, got:\n%s", result)
}
}

func TestNormalizeLarkTables_PreservesCodeBlocks(t *testing.T) {
input := "<lark-table><lark-tr><lark-td>```go\nfmt.Println()\n```</lark-td></lark-tr></lark-table>"

result := NormalizeLarkTables(input)

if !strings.Contains(result, "<lark-table>") {
t.Errorf("should preserve original lark-table when cells contain fenced code, got:\n%s", result)
}
}

func TestNormalizeLarkTables_PipeEscape(t *testing.T) {
input := `<lark-table><lark-tr><lark-td>A|B</lark-td><lark-td>C</lark-td></lark-tr></lark-table>`

result := NormalizeLarkTables(input)

if !strings.Contains(result, `A\|B`) {
t.Errorf("pipe in cell should be escaped, got:\n%s", result)
}
}

func TestNormalizeLarkTables_UnevenColumns(t *testing.T) {
input := `<lark-table><lark-tr><lark-td>A</lark-td><lark-td>B</lark-td></lark-tr><lark-tr><lark-td>C</lark-td></lark-tr></lark-table>`
result := NormalizeLarkTables(input)
if !strings.Contains(result, "| C | |") {
t.Errorf("expected row padding for uneven columns, got:\n%s", result)
}
}

func TestNormalizeLarkTables_FlattenGrid(t *testing.T) {
input := `<lark-table><lark-tr><lark-td><grid><column>Col 1</column><column>Col 2</column></grid></lark-td></lark-tr></lark-table>`
result := NormalizeLarkTables(input)
// Grid columns should be joined with newlines, which in GFM table cells become <br>
if !strings.Contains(result, "Col 1<br>Col 2") {
t.Errorf("expected flattened grid with <br>, got:\n%s", result)
}
}

func TestNormalizeLarkTables_StripTextTags(t *testing.T) {
input := `<lark-table><lark-tr><lark-td><text color="red">Red Text</text></lark-td></lark-tr></lark-table>`
result := NormalizeLarkTables(input)
if !strings.Contains(result, "| Red Text |") {
t.Errorf("expected text tag to be stripped, got:\n%s", result)
}
}

func TestNormalizeLarkTables_BrTag(t *testing.T) {
input := `<lark-table><lark-tr><lark-td>Line 1<br/>Line 2</lark-td></lark-tr></lark-table>`
result := NormalizeLarkTables(input)
if !strings.Contains(result, "Line 1<br>Line 2") {
t.Errorf("expected <br/> to be normalized to <br>, got:\n%s", result)
}
}

func TestNormalizeLarkTables_NoTable(t *testing.T) {
input := "# Hello\n\nJust some text."
result := NormalizeLarkTables(input)
if result != input {
t.Errorf("should not modify text without lark-table")
}
}
Loading