-
Notifications
You must be signed in to change notification settings - Fork 413
feat(docs): normalize <lark-table> to GFM pipe tables in docs +fetch #260
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
liuhedev
wants to merge
2
commits into
larksuite:main
Choose a base branch
from
liuhedev:feat/docs-fetch-normalize-lark-table
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+250
−0
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,150 @@ | ||
| // Copyright (c) 2026 Lark Technologies Pte. Ltd. | ||
| // SPDX-License-Identifier: MIT | ||
|
|
||
| package doc | ||
|
|
||
| import ( | ||
| "regexp" | ||
| "strings" | ||
| ) | ||
|
|
||
| var ( | ||
| larkTableRE = regexp.MustCompile(`(?is)<lark-table\b[^>]*>(.*?)</lark-table>`) | ||
| larkTrRE = regexp.MustCompile(`(?is)<lark-tr\b[^>]*>(.*?)</lark-tr>`) | ||
| larkTdRE = regexp.MustCompile(`(?is)<lark-td\b[^>]*>(.*?)</lark-td>`) | ||
| gridRE = regexp.MustCompile(`(?is)<grid\b[^>]*>(.*?)</grid>`) | ||
| columnRE = regexp.MustCompile(`(?is)<column\b[^>]*>(.*?)</column>`) | ||
| textTagRE = regexp.MustCompile(`(?is)<text\b[^>]*>(.*?)</text>`) | ||
| larkTagRE = regexp.MustCompile(`(?i)</?lark-[^>]+>`) | ||
| brTagRE = regexp.MustCompile(`(?i)<br\s*/?>`) | ||
| headerAttrRE = regexp.MustCompile(`(?i)header-row="false"`) | ||
| ) | ||
|
|
||
| // NormalizeLarkTables converts Lark-formatted table XML-like structures (<lark-table>) | ||
| // into standard GitHub-flavored Markdown (GFM) pipe tables. | ||
| // If any cell contains a fenced code block (```), it returns the original markup | ||
| // to avoid breaking multi-line code rendering in pipe tables. | ||
| func NormalizeLarkTables(md string) string { | ||
| return larkTableRE.ReplaceAllStringFunc(md, func(block string) string { | ||
| m := larkTableRE.FindStringSubmatch(block) | ||
| if len(m) < 2 { | ||
| return block | ||
| } | ||
| inner := m[1] | ||
| headerRow := !headerAttrRE.MatchString(block) | ||
|
|
||
| rowMatches := larkTrRE.FindAllStringSubmatch(inner, -1) | ||
| if len(rowMatches) == 0 { | ||
| return block | ||
| } | ||
|
|
||
| var rows [][]string | ||
| maxCols := 0 | ||
| for _, rm := range rowMatches { | ||
| cellMatches := larkTdRE.FindAllStringSubmatch(rm[1], -1) | ||
| var cells []string | ||
| for _, cm := range cellMatches { | ||
| rawCell := cm[1] | ||
| // CRITICAL: GFM pipe tables cannot contain multi-line fenced code blocks. | ||
| // Check raw content before any tag stripping/cleaning to avoid brittleness. | ||
| if strings.Contains(rawCell, "```") { | ||
| return block | ||
| } | ||
| cells = append(cells, cleanCell(rawCell)) | ||
| } | ||
| if len(cells) > maxCols { | ||
| maxCols = len(cells) | ||
| } | ||
| rows = append(rows, cells) | ||
| } | ||
|
|
||
| if maxCols == 0 || len(rows) == 0 { | ||
| return block | ||
| } | ||
|
|
||
| // Row padding: Ensure all rows have the same number of columns | ||
| for i := range rows { | ||
| for len(rows[i]) < maxCols { | ||
| rows[i] = append(rows[i], "") | ||
| } | ||
| } | ||
|
|
||
| var lines []string | ||
| startBody := 0 | ||
|
|
||
| if headerRow && len(rows) > 0 { | ||
| lines = append(lines, "| "+joinCells(rows[0])+" |") | ||
| seps := make([]string, maxCols) | ||
| for i := range seps { | ||
| seps[i] = "---" | ||
| } | ||
| lines = append(lines, "| "+strings.Join(seps, " | ")+" |") | ||
| startBody = 1 | ||
| } | ||
|
|
||
| for i := startBody; i < len(rows); i++ { | ||
| lines = append(lines, "| "+joinCells(rows[i])+" |") | ||
| } | ||
|
|
||
| return strings.Join(lines, "\n") | ||
| }) | ||
| } | ||
|
|
||
| // joinCells joins a slice of strings with the GFM pipe separator, escaping existing pipes. | ||
| func joinCells(cells []string) string { | ||
| escaped := make([]string, len(cells)) | ||
| for i, c := range cells { | ||
| c = strings.ReplaceAll(c, "\n", "<br>") | ||
| c = strings.ReplaceAll(c, "|", "\\|") | ||
| escaped[i] = c | ||
| } | ||
| return strings.Join(escaped, " | ") | ||
| } | ||
|
|
||
| // cleanCell strips Lark-specific tags and flattens nested layout structures. | ||
| func cleanCell(s string) string { | ||
| s = strings.TrimSpace(s) | ||
| s = flattenGrid(s) | ||
| s = stripTextTags(s) | ||
| s = larkTagRE.ReplaceAllString(s, "") | ||
| s = brTagRE.ReplaceAllString(s, "\n") | ||
| return strings.TrimSpace(s) | ||
| } | ||
|
|
||
| // flattenGrid handles <grid><column>...</column></grid> layouts by joining columns with newlines. | ||
| func flattenGrid(s string) string { | ||
| for { | ||
| n := gridRE.ReplaceAllStringFunc(s, func(g string) string { | ||
| m := gridRE.FindStringSubmatch(g) | ||
| if len(m) < 2 { | ||
| return g | ||
| } | ||
| cols := columnRE.FindAllStringSubmatch(m[1], -1) | ||
| if len(cols) == 0 { | ||
| return strings.TrimSpace(m[1]) | ||
| } | ||
| var parts []string | ||
| for _, c := range cols { | ||
| parts = append(parts, cleanCell(c[1])) | ||
| } | ||
| return strings.Join(parts, "\n") | ||
| }) | ||
| if n == s { | ||
| break | ||
| } | ||
| s = n | ||
| } | ||
| return s | ||
| } | ||
|
|
||
| // stripTextTags removes <text color="..."> wrappers but preserves their content. | ||
| func stripTextTags(s string) string { | ||
| for { | ||
| n := textTagRE.ReplaceAllString(s, "$1") | ||
| if n == s { | ||
| break | ||
| } | ||
| s = n | ||
| } | ||
| return s | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,99 @@ | ||
| // Copyright (c) 2026 Lark Technologies Pte. Ltd. | ||
| // SPDX-License-Identifier: MIT | ||
|
|
||
| package doc | ||
|
|
||
| import ( | ||
| "strings" | ||
| "testing" | ||
| ) | ||
|
|
||
| func TestNormalizeLarkTables_Basic(t *testing.T) { | ||
| input := `<lark-table><lark-tr><lark-td>Name</lark-td><lark-td>Age</lark-td></lark-tr><lark-tr><lark-td>Alice</lark-td><lark-td>30</lark-td></lark-tr></lark-table>` | ||
|
|
||
| result := NormalizeLarkTables(input) | ||
|
|
||
| if !strings.Contains(result, "| Name | Age |") { | ||
| t.Errorf("expected GFM header row, got:\n%s", result) | ||
| } | ||
| if !strings.Contains(result, "| --- | --- |") { | ||
| t.Errorf("expected separator row, got:\n%s", result) | ||
| } | ||
| if !strings.Contains(result, "| Alice | 30 |") { | ||
| t.Errorf("expected data row, got:\n%s", result) | ||
| } | ||
| } | ||
|
|
||
| func TestNormalizeLarkTables_NoHeaderRow(t *testing.T) { | ||
| input := `<lark-table header-row="false"><lark-tr><lark-td>A</lark-td><lark-td>B</lark-td></lark-tr></lark-table>` | ||
|
|
||
| result := NormalizeLarkTables(input) | ||
|
|
||
| if strings.Contains(result, "| --- |") { | ||
| t.Errorf("should not have separator when header-row=false, got:\n%s", result) | ||
| } | ||
| if !strings.Contains(result, "| A | B |") { | ||
| t.Errorf("expected data row, got:\n%s", result) | ||
| } | ||
| } | ||
|
|
||
| func TestNormalizeLarkTables_PreservesCodeBlocks(t *testing.T) { | ||
| input := "<lark-table><lark-tr><lark-td>```go\nfmt.Println()\n```</lark-td></lark-tr></lark-table>" | ||
|
|
||
| result := NormalizeLarkTables(input) | ||
|
|
||
| if !strings.Contains(result, "<lark-table>") { | ||
| t.Errorf("should preserve original lark-table when cells contain fenced code, got:\n%s", result) | ||
| } | ||
| } | ||
|
|
||
| func TestNormalizeLarkTables_PipeEscape(t *testing.T) { | ||
| input := `<lark-table><lark-tr><lark-td>A|B</lark-td><lark-td>C</lark-td></lark-tr></lark-table>` | ||
|
|
||
| result := NormalizeLarkTables(input) | ||
|
|
||
| if !strings.Contains(result, `A\|B`) { | ||
| t.Errorf("pipe in cell should be escaped, got:\n%s", result) | ||
| } | ||
| } | ||
|
|
||
| func TestNormalizeLarkTables_UnevenColumns(t *testing.T) { | ||
| input := `<lark-table><lark-tr><lark-td>A</lark-td><lark-td>B</lark-td></lark-tr><lark-tr><lark-td>C</lark-td></lark-tr></lark-table>` | ||
| result := NormalizeLarkTables(input) | ||
| if !strings.Contains(result, "| C | |") { | ||
| t.Errorf("expected row padding for uneven columns, got:\n%s", result) | ||
| } | ||
| } | ||
|
|
||
| func TestNormalizeLarkTables_FlattenGrid(t *testing.T) { | ||
| input := `<lark-table><lark-tr><lark-td><grid><column>Col 1</column><column>Col 2</column></grid></lark-td></lark-tr></lark-table>` | ||
| result := NormalizeLarkTables(input) | ||
| // Grid columns should be joined with newlines, which in GFM table cells become <br> | ||
| if !strings.Contains(result, "Col 1<br>Col 2") { | ||
| t.Errorf("expected flattened grid with <br>, got:\n%s", result) | ||
| } | ||
| } | ||
|
|
||
| func TestNormalizeLarkTables_StripTextTags(t *testing.T) { | ||
| input := `<lark-table><lark-tr><lark-td><text color="red">Red Text</text></lark-td></lark-tr></lark-table>` | ||
| result := NormalizeLarkTables(input) | ||
| if !strings.Contains(result, "| Red Text |") { | ||
| t.Errorf("expected text tag to be stripped, got:\n%s", result) | ||
| } | ||
| } | ||
|
|
||
| func TestNormalizeLarkTables_BrTag(t *testing.T) { | ||
| input := `<lark-table><lark-tr><lark-td>Line 1<br/>Line 2</lark-td></lark-tr></lark-table>` | ||
| result := NormalizeLarkTables(input) | ||
| if !strings.Contains(result, "Line 1<br>Line 2") { | ||
| t.Errorf("expected <br/> to be normalized to <br>, got:\n%s", result) | ||
| } | ||
| } | ||
|
|
||
| func TestNormalizeLarkTables_NoTable(t *testing.T) { | ||
| input := "# Hello\n\nJust some text." | ||
| result := NormalizeLarkTables(input) | ||
| if result != input { | ||
| t.Errorf("should not modify text without lark-table") | ||
| } | ||
| } |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.