From 3202072ef65ab911f13d0871c304e4789a8b42ea Mon Sep 17 00:00:00 2001
From: iambus <iambus@gmail.com>
Date: Mon, 13 Dec 2021 17:02:34 +0800
Subject: [PATCH 1/8] fix panic: runtime error: slice bounds out of range

---
 diffmatchpatch/diff.go       | 16 ++++------------
 diffmatchpatch/patch_test.go | 25 +++++++++++++++++++++++++
 diffmatchpatch/stringutil.go | 18 ++++--------------
 3 files changed, 33 insertions(+), 26 deletions(-)
diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go
index 2a9f2dc..9b0e4ab 100644
--- a/diffmatchpatch/diff.go
+++ b/diffmatchpatch/diff.go
@@ -34,8 +34,6 @@ const (
 	DiffInsert Operation = 1
 	// DiffEqual item represents an equal diff.
 	DiffEqual Operation = 0
-	//IndexSeparator is used to seperate the array indexes in an index string
-	IndexSeparator = ","
 )
 
 // Diff represents one diff operation
@@ -406,17 +404,11 @@ func (dmp *DiffMatchPatch) DiffLinesToRunes(text1, text2 string) ([]rune, []rune
 func (dmp *DiffMatchPatch) DiffCharsToLines(diffs []Diff, lineArray []string) []Diff {
 	hydrated := make([]Diff, 0, len(diffs))
 	for _, aDiff := range diffs {
-		chars := strings.Split(aDiff.Text, IndexSeparator)
-		text := make([]string, len(chars))
-
-		for i, r := range chars {
-			i1, err := strconv.Atoi(r)
-			if err == nil {
-				text[i] = lineArray[i1]
-			}
+		var sb strings.Builder
+		for _, i := range []rune(aDiff.Text) {
+			sb.WriteString(lineArray[i])
 		}
-
-		aDiff.Text = strings.Join(text, "")
+		aDiff.Text = sb.String()
 		hydrated = append(hydrated, aDiff)
 	}
 	return hydrated
diff --git a/diffmatchpatch/patch_test.go b/diffmatchpatch/patch_test.go
index b019f88..c564f8c 100644
--- a/diffmatchpatch/patch_test.go
+++ b/diffmatchpatch/patch_test.go
@@ -337,3 +337,28 @@ func TestPatchApply(t *testing.T) {
 		assert.Equal(t, tc.ExpectedApplies, actualApplies, fmt.Sprintf("Test case #%d, %s", i, tc.Name))
 	}
 }
+
+func TestPatchMakeOutOfRangePanic(t *testing.T) {
+	text1 := `
+  1111111111111 000000
+  ------------- ------
+  xxxxxxxxxxxxx ------
+  xxxxxxxxxxxxx ------
+  xxxxxxxxxxxxx xxxxxx
+  xxxxxxxxxxxxx ......
+  xxxxxxxxxxxxx 111111
+  xxxxxxxxxxxxx ??????
+  xxxxxxxxxxxxx 333333
+  xxxxxxxxxxxxx 555555
+  xxxxxxxxxx xxxxx
+  xxxxxxxxxx xxxxx
+  xxxxxxxxxx xxxxx
+  xxxxxxxxxx xxxxx
+`
+	text2 := `
+  2222222222222 000000
+  xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx`
+	dmp := New()
+	patches := dmp.PatchMake(text1, text2)
+	assert.Equal(t, 6, len(patches), "TestPatchMakeOutOfRangePanic")
+}
diff --git a/diffmatchpatch/stringutil.go b/diffmatchpatch/stringutil.go
index 44c4359..da0257c 100644
--- a/diffmatchpatch/stringutil.go
+++ b/diffmatchpatch/stringutil.go
@@ -9,7 +9,6 @@
 package diffmatchpatch
 
 import (
-	"strconv"
 	"strings"
 	"unicode/utf8"
 )
@@ -89,18 +88,9 @@ func runesIndex(r1, r2 []rune) int {
 }
 
 func intArrayToString(ns []uint32) string {
-	if len(ns) == 0 {
-		return ""
+	runes := make([]rune, len(ns))
+	for i := 0; i < len(ns); i++ {
+		runes[i] = rune(ns[i])
 	}
-
-	indexSeparator := IndexSeparator[0]
-
-	// Appr. 3 chars per num plus the comma.
-	b := []byte{}
-	for _, n := range ns {
-		b = strconv.AppendInt(b, int64(n), 10)
-		b = append(b, indexSeparator)
-	}
-	b = b[:len(b)-1]
-	return string(b)
+	return string(runes)
 }

From ca1db569574d5f4009033235fb6b9246208b0187 Mon Sep 17 00:00:00 2001
From: moyrn <Chengpin430124>
Date: Tue, 1 Mar 2022 09:18:52 +0800
Subject: [PATCH 2/8] fix bug: CVE-2019-11254 upgrade gopkg.in/yaml.v2

---
 go.mod | 2 +-
 go.sum | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/go.mod b/go.mod
index d46c77d..c731033 100644
--- a/go.mod
+++ b/go.mod
@@ -5,7 +5,7 @@ require (
 	github.com/kr/pretty v0.1.0 // indirect
 	github.com/stretchr/testify v1.4.0
 	gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect
-	gopkg.in/yaml.v2 v2.2.4 // indirect
+	gopkg.in/yaml.v2 v2.4.0 // indirect
 )
 
 go 1.12
diff --git a/go.sum b/go.sum
index 4b80e08..8dd9f36 100644
--- a/go.sum
+++ b/go.sum
@@ -19,3 +19,5 @@ gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
 gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
 gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I=
 gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
+gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=

From 3403a1642ff93712b699ebd52ecf3c53c2b08107 Mon Sep 17 00:00:00 2001
From: Boyu Guo <iambus@gmail.com>
Date: Fri, 23 Sep 2022 10:44:03 +0800
Subject: [PATCH 3/8] update tests

---
 diffmatchpatch/diff_test.go | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/diffmatchpatch/diff_test.go b/diffmatchpatch/diff_test.go
index acb97e3..7c8c9a1 100644
--- a/diffmatchpatch/diff_test.go
+++ b/diffmatchpatch/diff_test.go
@@ -314,10 +314,10 @@ func TestDiffLinesToChars(t *testing.T) {
 	dmp := New()
 
 	for i, tc := range []TestCase{
-		{"", "alpha\r\nbeta\r\n\r\n\r\n", "", "1,2,3,3", []string{"", "alpha\r\n", "beta\r\n", "\r\n"}},
-		{"a", "b", "1", "2", []string{"", "a", "b"}},
+		{"", "alpha\r\nbeta\r\n\r\n\r\n", "", "\x01\x02\x03\x03", []string{"", "alpha\r\n", "beta\r\n", "\r\n"}},
+		{"a", "b", "\x01", "\x02", []string{"", "a", "b"}},
 		// Omit final newline.
-		{"alpha\nbeta\nalpha", "", "1,2,3", "", []string{"", "alpha\n", "beta\n", "alpha"}},
+		{"alpha\nbeta\nalpha", "", "\x01\x02\x03", "", []string{"", "alpha\n", "beta\n", "alpha"}},
 	} {
 		actualChars1, actualChars2, actualLines := dmp.DiffLinesToChars(tc.Text1, tc.Text2)
 		assert.Equal(t, tc.ExpectedChars1, actualChars1, fmt.Sprintf("Test case #%d, %#v", i, tc))
@@ -330,14 +330,14 @@ func TestDiffLinesToChars(t *testing.T) {
 	lineList := []string{
 		"", // Account for the initial empty element of the lines array.
 	}
-	var charList []string
+	var charList []rune
 	for x := 1; x < n+1; x++ {
 		lineList = append(lineList, strconv.Itoa(x)+"\n")
-		charList = append(charList, strconv.Itoa(x))
+		charList = append(charList, rune(x))
 	}
 	lines := strings.Join(lineList, "")
-	chars := strings.Join(charList[:], ",")
-	assert.Equal(t, n, len(strings.Split(chars, ",")))
+	chars := string(charList)
+	assert.Equal(t, n, len(charList))
 
 	actualChars1, actualChars2, actualLines := dmp.DiffLinesToChars(lines, "")
 	assert.Equal(t, chars, actualChars1)
@@ -358,8 +358,8 @@ func TestDiffCharsToLines(t *testing.T) {
 	for i, tc := range []TestCase{
 		{
 			Diffs: []Diff{
-				{DiffEqual, "1,2,1"},
-				{DiffInsert, "2,1,2"},
+				{DiffEqual, "\x01\x02\x01"},
+				{DiffInsert, "\x02\x01\x02"},
 			},
 			Lines: []string{"", "alpha\n", "beta\n"},
 
@@ -378,13 +378,13 @@ func TestDiffCharsToLines(t *testing.T) {
 	lineList := []string{
 		"", // Account for the initial empty element of the lines array.
 	}
-	charList := []string{}
+	charList := []rune{}
 	for x := 1; x <= n; x++ {
 		lineList = append(lineList, strconv.Itoa(x)+"\n")
-		charList = append(charList, strconv.Itoa(x))
+		charList = append(charList, rune(x))
 	}
 	assert.Equal(t, n, len(charList))
-	chars := strings.Join(charList[:], ",")
+	chars := string(charList)
 
 	actual := dmp.DiffCharsToLines([]Diff{Diff{DiffDelete, chars}}, lineList)
 	assert.Equal(t, []Diff{Diff{DiffDelete, strings.Join(lineList, "")}}, actual)

From ff0d34ba8093cd989afb5ead57451452a16df340 Mon Sep 17 00:00:00 2001
From: Boyu Guo <iambus@gmail.com>
Date: Tue, 27 Sep 2022 01:17:47 +0800
Subject: [PATCH 4/8] skip invalid unicode code point to fix
 TestMassiveRuneDiffConversion

---
 diffmatchpatch/diff.go       | 27 ++++++++++++++++-----------
 diffmatchpatch/diff_test.go  | 12 ++++++------
 diffmatchpatch/index.go      | 32 ++++++++++++++++++++++++++++++++
 diffmatchpatch/index_test.go | 16 ++++++++++++++++
 diffmatchpatch/stringutil.go |  8 --------
 5 files changed, 70 insertions(+), 25 deletions(-)
 create mode 100644 diffmatchpatch/index.go
 create mode 100644 diffmatchpatch/index_test.go

diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go
index 9b0e4ab..25cc51f 100644
--- a/diffmatchpatch/diff.go
+++ b/diffmatchpatch/diff.go
@@ -388,16 +388,21 @@ func (dmp *DiffMatchPatch) diffBisectSplit(runes1, runes2 []rune, x, y int,
 }
 
 // DiffLinesToChars splits two texts into a list of strings, and educes the texts to a string of hashes where each Unicode character represents one line.
-// It's slightly faster to call DiffLinesToRunes first, followed by DiffMainRunes.
 func (dmp *DiffMatchPatch) DiffLinesToChars(text1, text2 string) (string, string, []string) {
-	chars1, chars2, lineArray := dmp.diffLinesToStrings(text1, text2)
-	return chars1, chars2, lineArray
+	chars1, chars2, lineArray := dmp.diffLinesToIndexes(text1, text2)
+	return indexesToString(chars1), indexesToString(chars2), lineArray
 }
 
 // DiffLinesToRunes splits two texts into a list of runes.
 func (dmp *DiffMatchPatch) DiffLinesToRunes(text1, text2 string) ([]rune, []rune, []string) {
+	chars1, chars2, lineArray := dmp.diffLinesToIndexes(text1, text2)
+	return []rune(indexesToString(chars1)), []rune(indexesToString(chars2)), lineArray
+}
+
+// diffLinesToIndexes splits two texts into a list of indexes
+func (dmp *DiffMatchPatch) diffLinesToIndexes(text1, text2 string) ([]index, []index, []string) {
 	chars1, chars2, lineArray := dmp.diffLinesToStrings(text1, text2)
-	return []rune(chars1), []rune(chars2), lineArray
+	return chars1, chars2, lineArray
 }
 
 // DiffCharsToLines rehydrates the text in a diff from a string of line hashes to real lines of text.
@@ -405,7 +410,7 @@ func (dmp *DiffMatchPatch) DiffCharsToLines(diffs []Diff, lineArray []string) []
 	hydrated := make([]Diff, 0, len(diffs))
 	for _, aDiff := range diffs {
 		var sb strings.Builder
-		for _, i := range []rune(aDiff.Text) {
+		for _, i := range stringToIndex(aDiff.Text) {
 			sb.WriteString(lineArray[i])
 		}
 		aDiff.Text = sb.String()
@@ -1301,7 +1306,7 @@ func (dmp *DiffMatchPatch) DiffFromDelta(text1 string, delta string) (diffs []Di
 }
 
 // diffLinesToStrings splits two texts into a list of strings. Each string represents one line.
-func (dmp *DiffMatchPatch) diffLinesToStrings(text1, text2 string) (string, string, []string) {
+func (dmp *DiffMatchPatch) diffLinesToStrings(text1, text2 string) ([]index, []index, []string) {
 	// '\x00' is a valid character, but various debuggers don't like it. So we'll insert a junk entry to avoid generating a null character.
 	lineArray := []string{""} // e.g. lineArray[4] == 'Hello\n'
 
@@ -1309,16 +1314,16 @@ func (dmp *DiffMatchPatch) diffLinesToStrings(text1, text2 string) (string, stri
 	strIndexArray1 := dmp.diffLinesToStringsMunge(text1, &lineArray)
 	strIndexArray2 := dmp.diffLinesToStringsMunge(text2, &lineArray)
 
-	return intArrayToString(strIndexArray1), intArrayToString(strIndexArray2), lineArray
+	return strIndexArray1, strIndexArray2, lineArray
 }
 
 // diffLinesToStringsMunge splits a text into an array of strings, and reduces the texts to a []string.
-func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]string) []uint32 {
+func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]string) []index {
 	// Walk the text, pulling out a substring for each line. text.split('\n') would would temporarily double our memory footprint. Modifying text would create many large strings to garbage collect.
 	lineHash := map[string]int{} // e.g. lineHash['Hello\n'] == 4
 	lineStart := 0
 	lineEnd := -1
-	strs := []uint32{}
+	strs := []index{}
 
 	for lineEnd < len(text)-1 {
 		lineEnd = indexOf(text, "\n", lineStart)
@@ -1332,11 +1337,11 @@ func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]str
 		lineValue, ok := lineHash[line]
 
 		if ok {
-			strs = append(strs, uint32(lineValue))
+			strs = append(strs, index(lineValue))
 		} else {
 			*lineArray = append(*lineArray, line)
 			lineHash[line] = len(*lineArray) - 1
-			strs = append(strs, uint32(len(*lineArray)-1))
+			strs = append(strs, index(len(*lineArray)-1))
 		}
 	}
 
diff --git a/diffmatchpatch/diff_test.go b/diffmatchpatch/diff_test.go
index 7c8c9a1..7955b1f 100644
--- a/diffmatchpatch/diff_test.go
+++ b/diffmatchpatch/diff_test.go
@@ -330,13 +330,13 @@ func TestDiffLinesToChars(t *testing.T) {
 	lineList := []string{
 		"", // Account for the initial empty element of the lines array.
 	}
-	var charList []rune
+	var charList []index
 	for x := 1; x < n+1; x++ {
 		lineList = append(lineList, strconv.Itoa(x)+"\n")
-		charList = append(charList, rune(x))
+		charList = append(charList, index(x))
 	}
 	lines := strings.Join(lineList, "")
-	chars := string(charList)
+	chars := indexesToString(charList)
 	assert.Equal(t, n, len(charList))
 
 	actualChars1, actualChars2, actualLines := dmp.DiffLinesToChars(lines, "")
@@ -378,13 +378,13 @@ func TestDiffCharsToLines(t *testing.T) {
 	lineList := []string{
 		"", // Account for the initial empty element of the lines array.
 	}
-	charList := []rune{}
+	charList := []index{}
 	for x := 1; x <= n; x++ {
 		lineList = append(lineList, strconv.Itoa(x)+"\n")
-		charList = append(charList, rune(x))
+		charList = append(charList, index(x))
 	}
 	assert.Equal(t, n, len(charList))
-	chars := string(charList)
+	chars := indexesToString(charList)
 
 	actual := dmp.DiffCharsToLines([]Diff{Diff{DiffDelete, chars}}, lineList)
 	assert.Equal(t, []Diff{Diff{DiffDelete, strings.Join(lineList, "")}}, actual)
diff --git a/diffmatchpatch/index.go b/diffmatchpatch/index.go
new file mode 100644
index 0000000..965a1c6
--- /dev/null
+++ b/diffmatchpatch/index.go
@@ -0,0 +1,32 @@
+package diffmatchpatch
+
+type index uint32
+
+const runeSkipStart = 0xd800
+const runeSkipEnd = 0xdfff + 1
+const runeMax = 0x110000 // next invalid code point
+
+func stringToIndex(text string) []index {
+	runes := []rune(text)
+	indexes := make([]index, len(runes))
+	for i, r := range runes {
+		if r < runeSkipEnd {
+			indexes[i] = index(r)
+		} else {
+			indexes[i] = index(r) - (runeSkipEnd - runeSkipStart)
+		}
+	}
+	return indexes
+}
+
+func indexesToString(indexes []index) string {
+	runes := make([]rune, len(indexes))
+	for i, index := range indexes {
+		if index < runeSkipStart {
+			runes[i] = rune(index)
+		} else {
+			runes[i] = rune(index + (runeSkipEnd - runeSkipStart))
+		}
+	}
+	return string(runes)
+}
diff --git a/diffmatchpatch/index_test.go b/diffmatchpatch/index_test.go
new file mode 100644
index 0000000..6f1d982
--- /dev/null
+++ b/diffmatchpatch/index_test.go
@@ -0,0 +1,16 @@
+package diffmatchpatch
+
+import (
+	"github.com/stretchr/testify/assert"
+	"testing"
+)
+
+func TestIndexConversion(t *testing.T) {
+	n := runeMax - (runeSkipEnd - runeSkipStart)
+	indexes := make([]index, n)
+	for i := 0; i < n; i++ {
+		indexes[i] = index(i)
+	}
+	indexes2 := stringToIndex(indexesToString(indexes))
+	assert.EqualValues(t, indexes, indexes2)
+}
diff --git a/diffmatchpatch/stringutil.go b/diffmatchpatch/stringutil.go
index da0257c..265f29c 100644
--- a/diffmatchpatch/stringutil.go
+++ b/diffmatchpatch/stringutil.go
@@ -86,11 +86,3 @@ func runesIndex(r1, r2 []rune) int {
 	}
 	return -1
 }
-
-func intArrayToString(ns []uint32) string {
-	runes := make([]rune, len(ns))
-	for i := 0; i < len(ns); i++ {
-		runes[i] = rune(ns[i])
-	}
-	return string(runes)
-}

From 6dbe13c91e8e382b53e2cc8e8146fa6172fefec4 Mon Sep 17 00:00:00 2001
From: nrnrk <noriki6t@gmail.com>
Date: Sun, 16 Oct 2022 01:17:33 +0900
Subject: [PATCH 5/8] fix: use common lineHash to share indice between text1
 and text2

Use common cache of line contents between two texts in `DiffLinesToChars` to get line diffs correctly.
In some cases, line diffs cannot be retrieved correctly in the standard way (https://github.com/google/diff-match-patch/wiki/Line-or-Word-Diffs#line-mode).
In the below case, we failed to get line diffs correctly before this fix.

```go:main.go
package main

import (
	"fmt"

	"github.com/sergi/go-diff/diffmatchpatch"
)

const (
	text1 = `hoge:
  step11:
  - arrayitem1
  - arrayitem2
  step12:
    step21: hoge
    step22: -93
fuga: flatitem
`
	text2 = `hoge:
  step11:
  - arrayitem4
  - arrayitem2
  - arrayitem3
  step12:
    step21: hoge
    step22: -92
fuga: flatitem
`
)

func main() {
	dmp := diffmatchpatch.New()
	a, b, c := dmp.DiffLinesToChars(text1, text2)
	diffs := dmp.DiffMain(a, b, false)
	diffs = dmp.DiffCharsToLines(diffs, c)
	// diffs = dmp.DiffCleanupSemantic(diffs)
	fmt.Println(diffs)
}
```

```text:output
[{Insert hoge:
  step11:
hoge:
} {Equal hoge:
} {Insert hoge:
} {Equal   step11:
} {Insert hoge:
} {Equal   - arrayitem1
} {Insert hoge:
} {Equal   - arrayitem2
} {Insert hoge:
} {Equal   step12:
} {Insert hoge:
} {Equal     step21: hoge
} {Insert hoge:
} {Equal     step22: -93
} {Delete fuga: flatitem
}]
```

Note: This fix corresponds to a javascript implementation.
(ref: https://github.com/google/diff-match-patch/blob/62f2e689f498f9c92dbc588c58750addec9b1654/javascript/diff_match_patch_uncompressed.js#L466)
---
 diffmatchpatch/diff.go      | 8 ++++----
 diffmatchpatch/diff_test.go | 2 ++
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go
index 2a9f2dc..4f7b424 100644
--- a/diffmatchpatch/diff.go
+++ b/diffmatchpatch/diff.go
@@ -1313,17 +1313,17 @@ func (dmp *DiffMatchPatch) diffLinesToStrings(text1, text2 string) (string, stri
 	// '\x00' is a valid character, but various debuggers don't like it. So we'll insert a junk entry to avoid generating a null character.
 	lineArray := []string{""} // e.g. lineArray[4] == 'Hello\n'
 
+	lineHash := make(map[string]int)
 	//Each string has the index of lineArray which it points to
-	strIndexArray1 := dmp.diffLinesToStringsMunge(text1, &lineArray)
-	strIndexArray2 := dmp.diffLinesToStringsMunge(text2, &lineArray)
+	strIndexArray1 := dmp.diffLinesToStringsMunge(text1, &lineArray, lineHash)
+	strIndexArray2 := dmp.diffLinesToStringsMunge(text2, &lineArray, lineHash)
 
 	return intArrayToString(strIndexArray1), intArrayToString(strIndexArray2), lineArray
 }
 
 // diffLinesToStringsMunge splits a text into an array of strings, and reduces the texts to a []string.
-func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]string) []uint32 {
+func (dmp *DiffMatchPatch) diffLinesToStringsMunge(text string, lineArray *[]string, lineHash map[string]int) []uint32 {
 	// Walk the text, pulling out a substring for each line. text.split('\n') would would temporarily double our memory footprint. Modifying text would create many large strings to garbage collect.
-	lineHash := map[string]int{} // e.g. lineHash['Hello\n'] == 4
 	lineStart := 0
 	lineEnd := -1
 	strs := []uint32{}
diff --git a/diffmatchpatch/diff_test.go b/diffmatchpatch/diff_test.go
index acb97e3..d6fed50 100644
--- a/diffmatchpatch/diff_test.go
+++ b/diffmatchpatch/diff_test.go
@@ -318,6 +318,8 @@ func TestDiffLinesToChars(t *testing.T) {
 		{"a", "b", "1", "2", []string{"", "a", "b"}},
 		// Omit final newline.
 		{"alpha\nbeta\nalpha", "", "1,2,3", "", []string{"", "alpha\n", "beta\n", "alpha"}},
+		// Same lines in Text1 and Text2
+		{"abc\ndefg\n12345\n", "abc\ndef\n12345\n678", "1,2,3", "1,4,3,5", []string{"", "abc\n", "defg\n", "12345\n", "def\n", "678"}},
 	} {
 		actualChars1, actualChars2, actualLines := dmp.DiffLinesToChars(tc.Text1, tc.Text2)
 		assert.Equal(t, tc.ExpectedChars1, actualChars1, fmt.Sprintf("Test case #%d, %#v", i, tc))

From 74798f5aa5d5c9720e7ff8895fee08e19db05cc7 Mon Sep 17 00:00:00 2001
From: Darkhan Kubigenov <darkhanu@gmail.com>
Date: Sun, 22 Jan 2023 01:50:45 +0000
Subject: [PATCH 6/8] Add a failing line diff test case

Current implementation produces wrong result because
it calls `DiffMain` on the following 2 arguments:

* `1,2,3,4,5,6,7,8,9,10`
* `1,2,3,4,5,6,7,8,9,11`

This numbers represent indices into the lines array.
The algorithm finds that equal part of those
strings is `1,2,3,4,5,6,7,8,9,1` and which is followed
by `Delete 0` and `Insert `1`.
---
 diffmatchpatch/diff_test.go | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/diffmatchpatch/diff_test.go b/diffmatchpatch/diff_test.go
index d6fed50..95a7c8d 100644
--- a/diffmatchpatch/diff_test.go
+++ b/diffmatchpatch/diff_test.go
@@ -1468,6 +1468,38 @@ func TestMassiveRuneDiffConversion(t *testing.T) {
 	assert.NotEmpty(t, diffs)
 }
 
+func TestDiffPartialLineIndex(t *testing.T) {
+	dmp := New()
+	t1, t2, tt := dmp.DiffLinesToChars(
+`line 1
+line 2
+line 3
+line 4
+line 5
+line 6
+line 7
+line 8
+line 9
+line 10 text1`,
+`line 1
+line 2
+line 3
+line 4
+line 5
+line 6
+line 7
+line 8
+line 9
+line 10 text2`)
+	diffs := dmp.DiffMain(t1, t2, false)
+	diffs = dmp.DiffCharsToLines(diffs, tt)
+	assert.Equal(t, []Diff{
+				Diff{DiffEqual, "line 1\nline 2\nline 3\nline 4\nline 5\nline 6\nline 7\nline 8\nline 9\n"},
+				Diff{DiffDelete, "line 10 text1"},
+				Diff{DiffInsert, "line 10 text2"},
+			}, diffs)
+}
+
 func BenchmarkDiffMain(bench *testing.B) {
 	s1 := "`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"
 	s2 := "I am the very model of a modern major general,\nI've information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"

From a674b3095807a3679f154c5a4a6df738aeffdfdf Mon Sep 17 00:00:00 2001
From: Darkhan Kubigenov <darkhanu@gmail.com>
Date: Sat, 21 Jan 2023 23:12:39 +0000
Subject: [PATCH 7/8] Fix line diff by using runes without separators

[The suggested approach](https://github.com/google/diff-match-patch/wiki/Line-or-Word-Diffs#line-mode
) for doing line level diffing is the following set of steps:

1. `ti1, ti2, linesIdx = DiffLinesToChars(t1, t2)`
2. `diffs = DiffMain(ti1, ti2)`
3. `DiffCharsToLines(diff, linesIdx)`

The original implementation in `google/diff-match-patch` uses
unicode codepoints for storing indices in `ti1` and `ti2` joined by an empty string.
Current implementation in this repo stores them as integers joined by a
comma. While this implementation makes `ti1` and `ti2` more readable, it
introduces bugs when trying to rely on it when doing line level diffing
with `DiffMain`. The root cause of the issue is that an integer line
index might span more than one character/rune, and `DiffMain` can assume
that two different lines having the same index prefix match partially. For
example, indices 123 and 129 will have partial match `12`. In that
example, the diff will show lines 3 and 9 which is not correct. A simple
failing test case demonstrating this issue is available at
`TestDiffPartialLineIndex`.

In this PR I am adjusting the algorithm to use the same approach as in
[diff-match-patch](https://github.com/google/diff-match-patch/blob/62f2e689f498f9c92dbc588c58750addec9b1654/javascript/diff_match_patch_uncompressed.js#L508-L510
) by storing each line index as a rune.
While a rune in Golang is a type alias to uint32, not every uint32
can be a valid rune. During string to rune slice conversion invalid runes will
be replaced with `utf.RuneError`.

The integer to rune generation logic is based on the table in https://en.wikipedia.org/wiki/UTF-8#Encoding

The first 127 lines will work the fastest as they are represented as a
single bytes. Higher numbers are represented as 2-4 bytes.

In addition to that, the range `U+D800 - U+DFFF` contains
[invalid codepoints](https://en.wikipedia.org/wiki/UTF-8#Invalid_sequences_and_error_handling).
and all codepoints higher or equal to `0xD800` are incremented by
`0xDFFF - 0xD800`.

The maximum representable integer using this approach is 1'112'060.
This improves on Javascript implementation which currently
[bails out](https://github.com/google/diff-match-patch/blob/62f2e689f498f9c92dbc588c58750addec9b1654/javascript/diff_match_patch_uncompressed.js#L503-L505
) when files have more than 65535 lines.
---
 diffmatchpatch/diff.go            |  13 ++--
 diffmatchpatch/diff_test.go       |  38 ++++++------
 diffmatchpatch/stringutil.go      | 100 +++++++++++++++++++++++++++---
 diffmatchpatch/stringutil_test.go |  18 ++++++
 go.mod                            |   2 +-
 5 files changed, 133 insertions(+), 38 deletions(-)

diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go
index 4f7b424..915d509 100644
--- a/diffmatchpatch/diff.go
+++ b/diffmatchpatch/diff.go
@@ -34,8 +34,6 @@ const (
 	DiffInsert Operation = 1
 	// DiffEqual item represents an equal diff.
 	DiffEqual Operation = 0
-	//IndexSeparator is used to seperate the array indexes in an index string
-	IndexSeparator = ","
 )
 
 // Diff represents one diff operation
@@ -406,14 +404,11 @@ func (dmp *DiffMatchPatch) DiffLinesToRunes(text1, text2 string) ([]rune, []rune
 func (dmp *DiffMatchPatch) DiffCharsToLines(diffs []Diff, lineArray []string) []Diff {
 	hydrated := make([]Diff, 0, len(diffs))
 	for _, aDiff := range diffs {
-		chars := strings.Split(aDiff.Text, IndexSeparator)
-		text := make([]string, len(chars))
+		runes := []rune(aDiff.Text)
+		text := make([]string, len(runes))
 
-		for i, r := range chars {
-			i1, err := strconv.Atoi(r)
-			if err == nil {
-				text[i] = lineArray[i1]
-			}
+		for i, r := range runes {
+			text[i] = lineArray[runeToInt(r)]
 		}
 
 		aDiff.Text = strings.Join(text, "")
diff --git a/diffmatchpatch/diff_test.go b/diffmatchpatch/diff_test.go
index 95a7c8d..4532b79 100644
--- a/diffmatchpatch/diff_test.go
+++ b/diffmatchpatch/diff_test.go
@@ -314,12 +314,12 @@ func TestDiffLinesToChars(t *testing.T) {
 	dmp := New()
 
 	for i, tc := range []TestCase{
-		{"", "alpha\r\nbeta\r\n\r\n\r\n", "", "1,2,3,3", []string{"", "alpha\r\n", "beta\r\n", "\r\n"}},
-		{"a", "b", "1", "2", []string{"", "a", "b"}},
+		{"", "alpha\r\nbeta\r\n\r\n\r\n", "", "\x01\x02\x03\x03", []string{"", "alpha\r\n", "beta\r\n", "\r\n"}},
+		{"a", "b", "\x01", "\x02", []string{"", "a", "b"}},
 		// Omit final newline.
-		{"alpha\nbeta\nalpha", "", "1,2,3", "", []string{"", "alpha\n", "beta\n", "alpha"}},
+		{"alpha\nbeta\nalpha", "", "\x01\x02\x03", "", []string{"", "alpha\n", "beta\n", "alpha"}},
 		// Same lines in Text1 and Text2
-		{"abc\ndefg\n12345\n", "abc\ndef\n12345\n678", "1,2,3", "1,4,3,5", []string{"", "abc\n", "defg\n", "12345\n", "def\n", "678"}},
+		{"abc\ndefg\n12345\n", "abc\ndef\n12345\n678", "\x01\x02\x03", "\x01\x04\x03\x05", []string{"", "abc\n", "defg\n", "12345\n", "def\n", "678"}},
 	} {
 		actualChars1, actualChars2, actualLines := dmp.DiffLinesToChars(tc.Text1, tc.Text2)
 		assert.Equal(t, tc.ExpectedChars1, actualChars1, fmt.Sprintf("Test case #%d, %#v", i, tc))
@@ -332,14 +332,13 @@ func TestDiffLinesToChars(t *testing.T) {
 	lineList := []string{
 		"", // Account for the initial empty element of the lines array.
 	}
-	var charList []string
+	var charList []rune
 	for x := 1; x < n+1; x++ {
 		lineList = append(lineList, strconv.Itoa(x)+"\n")
-		charList = append(charList, strconv.Itoa(x))
+		charList = append(charList, rune(x))
 	}
 	lines := strings.Join(lineList, "")
-	chars := strings.Join(charList[:], ",")
-	assert.Equal(t, n, len(strings.Split(chars, ",")))
+	chars := string(charList)
 
 	actualChars1, actualChars2, actualLines := dmp.DiffLinesToChars(lines, "")
 	assert.Equal(t, chars, actualChars1)
@@ -360,8 +359,8 @@ func TestDiffCharsToLines(t *testing.T) {
 	for i, tc := range []TestCase{
 		{
 			Diffs: []Diff{
-				{DiffEqual, "1,2,1"},
-				{DiffInsert, "2,1,2"},
+				{DiffEqual, "\x01\x02\x01"},
+				{DiffInsert, "\x02\x01\x02"},
 			},
 			Lines: []string{"", "alpha\n", "beta\n"},
 
@@ -380,13 +379,12 @@ func TestDiffCharsToLines(t *testing.T) {
 	lineList := []string{
 		"", // Account for the initial empty element of the lines array.
 	}
-	charList := []string{}
+	charList := []rune{}
 	for x := 1; x <= n; x++ {
 		lineList = append(lineList, strconv.Itoa(x)+"\n")
-		charList = append(charList, strconv.Itoa(x))
+		charList = append(charList, rune(x))
 	}
-	assert.Equal(t, n, len(charList))
-	chars := strings.Join(charList[:], ",")
+	chars := string(charList)
 
 	actual := dmp.DiffCharsToLines([]Diff{Diff{DiffDelete, chars}}, lineList)
 	assert.Equal(t, []Diff{Diff{DiffDelete, strings.Join(lineList, "")}}, actual)
@@ -1471,7 +1469,7 @@ func TestMassiveRuneDiffConversion(t *testing.T) {
 func TestDiffPartialLineIndex(t *testing.T) {
 	dmp := New()
 	t1, t2, tt := dmp.DiffLinesToChars(
-`line 1
+		`line 1
 line 2
 line 3
 line 4
@@ -1481,7 +1479,7 @@ line 7
 line 8
 line 9
 line 10 text1`,
-`line 1
+		`line 1
 line 2
 line 3
 line 4
@@ -1494,10 +1492,10 @@ line 10 text2`)
 	diffs := dmp.DiffMain(t1, t2, false)
 	diffs = dmp.DiffCharsToLines(diffs, tt)
 	assert.Equal(t, []Diff{
-				Diff{DiffEqual, "line 1\nline 2\nline 3\nline 4\nline 5\nline 6\nline 7\nline 8\nline 9\n"},
-				Diff{DiffDelete, "line 10 text1"},
-				Diff{DiffInsert, "line 10 text2"},
-			}, diffs)
+		Diff{DiffEqual, "line 1\nline 2\nline 3\nline 4\nline 5\nline 6\nline 7\nline 8\nline 9\n"},
+		Diff{DiffDelete, "line 10 text1"},
+		Diff{DiffInsert, "line 10 text2"},
+	}, diffs)
 }
 
 func BenchmarkDiffMain(bench *testing.B) {
diff --git a/diffmatchpatch/stringutil.go b/diffmatchpatch/stringutil.go
index 44c4359..eb727bb 100644
--- a/diffmatchpatch/stringutil.go
+++ b/diffmatchpatch/stringutil.go
@@ -9,11 +9,16 @@
 package diffmatchpatch
 
 import (
-	"strconv"
+	"fmt"
 	"strings"
 	"unicode/utf8"
 )
 
+const UNICODE_INVALID_RANGE_START = 0xD800
+const UNICODE_INVALID_RANGE_END = 0xDFFF
+const UNICODE_INVALID_RANGE_DELTA = UNICODE_INVALID_RANGE_END - UNICODE_INVALID_RANGE_START + 1
+const UNICODE_RANGE_MAX = 0x10FFFF
+
 // unescaper unescapes selected chars for compatibility with JavaScript's encodeURI.
 // In speed critical applications this could be dropped since the receiving application will certainly decode these fine. Note that this function is case-sensitive.  Thus "%3F" would not be unescaped.  But this is ok because it is only called with the output of HttpUtility.UrlEncode which returns lowercase hex. Example: "%3f" -> "?", "%24" -> "$", etc.
 var unescaper = strings.NewReplacer(
@@ -93,14 +98,93 @@ func intArrayToString(ns []uint32) string {
 		return ""
 	}
 
-	indexSeparator := IndexSeparator[0]
-
-	// Appr. 3 chars per num plus the comma.
-	b := []byte{}
+	b := []rune{}
 	for _, n := range ns {
-		b = strconv.AppendInt(b, int64(n), 10)
-		b = append(b, indexSeparator)
+		b = append(b, intToRune(n))
 	}
-	b = b[:len(b)-1]
 	return string(b)
 }
+
+// These constants define the number of bits representable
+// in 1,2,3,4 byte utf8 sequences, respectively.
+const ONE_BYTE_BITS = 7
+const TWO_BYTE_BITS = 11
+const THREE_BYTE_BITS = 16
+const FOUR_BYTE_BITS = 21
+
+// Helper for getting a sequence of bits from an integer.
+func getBits(i uint32, cnt byte, from byte) byte {
+	return byte((i >> from) & ((1 << cnt) - 1))
+}
+
+// Converts an integer in the range 0~1112060 into a rune.
+// Based on the ranges table in https://en.wikipedia.org/wiki/UTF-8
+func intToRune(i uint32) rune {
+	if i < (1 << ONE_BYTE_BITS) {
+		return rune(i)
+	}
+
+	if i < (1 << TWO_BYTE_BITS) {
+		r, size := utf8.DecodeRune([]byte{0b11000000 | getBits(i, 5, 6), 0b10000000 | getBits(i, 6, 0)})
+		if size != 2 || r == utf8.RuneError {
+			panic(fmt.Sprintf("Error encoding an int %d with size 2, got rune %v and size %d", size, r, i))
+		}
+		return r
+	}
+
+	// Last -3 here needed because for some reason 3rd to last codepoint 65533 in this range
+	// was returning utf8.RuneError during encoding.
+	if i < ((1 << THREE_BYTE_BITS) - UNICODE_INVALID_RANGE_DELTA - 3) {
+		if i >= UNICODE_INVALID_RANGE_START {
+			i += UNICODE_INVALID_RANGE_DELTA
+		}
+
+		r, size := utf8.DecodeRune([]byte{0b11100000 | getBits(i, 4, 12), 0b10000000 | getBits(i, 6, 6), 0b10000000 | getBits(i, 6, 0)})
+		if size != 3 || r == utf8.RuneError {
+			panic(fmt.Sprintf("Error encoding an int %d with size 3, got rune %v and size %d", size, r, i))
+		}
+		return r
+	}
+
+	if i < (1<<FOUR_BYTE_BITS - UNICODE_INVALID_RANGE_DELTA - 3) {
+		i += UNICODE_INVALID_RANGE_DELTA + 3
+		r, size := utf8.DecodeRune([]byte{0b11110000 | getBits(i, 3, 18), 0b10000000 | getBits(i, 6, 12), 0b10000000 | getBits(i, 6, 6), 0b10000000 | getBits(i, 6, 0)})
+		if size != 4 || r == utf8.RuneError {
+			panic(fmt.Sprintf("Error encoding an int %d with size 4, got rune %v and size %d", size, r, i))
+		}
+		return r
+	}
+	panic(fmt.Sprintf("The integer %d is too large for runeToInt()", i))
+}
+
+// Converts a rune generated by intToRune back to an integer
+func runeToInt(r rune) uint32 {
+	i := uint32(r)
+	if i < (1 << ONE_BYTE_BITS) {
+		return i
+	}
+
+	bytes := []byte{0, 0, 0, 0}
+
+	size := utf8.EncodeRune(bytes, r)
+
+	if size == 2 {
+		return uint32(bytes[0]&0b11111)<<6 | uint32(bytes[1]&0b111111)
+	}
+
+	if size == 3 {
+		result := uint32(bytes[0]&0b1111)<<12 | uint32(bytes[1]&0b111111)<<6 | uint32(bytes[2]&0b111111)
+		if result >= UNICODE_INVALID_RANGE_END {
+			return result - UNICODE_INVALID_RANGE_DELTA
+		}
+
+		return result
+	}
+
+	if size == 4 {
+		result := uint32(bytes[0]&0b111)<<18 | uint32(bytes[1]&0b111111)<<12 | uint32(bytes[2]&0b111111)<<6 | uint32(bytes[3]&0b111111)
+		return result - UNICODE_INVALID_RANGE_DELTA - 3
+	}
+
+	panic(fmt.Sprintf("Unexpected state decoding rune=%v size=%d", r, size))
+}
diff --git a/diffmatchpatch/stringutil_test.go b/diffmatchpatch/stringutil_test.go
index ab2bc10..73ab6ca 100644
--- a/diffmatchpatch/stringutil_test.go
+++ b/diffmatchpatch/stringutil_test.go
@@ -114,3 +114,21 @@ func TestLastIndexOf(t *testing.T) {
 		assert.Equal(t, tc.Expected, actual, fmt.Sprintf("Test case #%d, %#v", i, tc))
 	}
 }
+
+// Exhaustive check for all ints from 0 to 1112060 for correctness of implementation
+// of `intToRune() -> runeToInt()`.
+// This test is slow and runs longer than 5 seconds but it does provide a safety
+// guarantee that these 2 functions are correct for the ranges we support.
+func TestRuneToInt(t *testing.T) {
+
+	for i := uint32(0); i <= UNICODE_RANGE_MAX-UNICODE_INVALID_RANGE_DELTA-3; i += 1 {
+		r := intToRune(i)
+		ic := runeToInt(r)
+
+		assert.Equal(t, i, ic, fmt.Sprintf("intToRune(%d)=%d and runeToInt(%d)=%d", i, r, r, ic))
+	}
+
+	assert.Panics(t, func() {
+		intToRune(UNICODE_RANGE_MAX - UNICODE_INVALID_RANGE_DELTA - 2)
+	})
+}
diff --git a/go.mod b/go.mod
index c731033..c7886ce 100644
--- a/go.mod
+++ b/go.mod
@@ -8,4 +8,4 @@ require (
 	gopkg.in/yaml.v2 v2.4.0 // indirect
 )
 
-go 1.12
+go 1.13

From 4ebce9caa45902d8ca5f63a242d53e1e5c6072d3 Mon Sep 17 00:00:00 2001
From: michaelcheah <20640150+michaelcheah@users.noreply.github.com>
Date: Fri, 24 Mar 2023 19:37:54 +0000
Subject: [PATCH 8/8] fix: colored display for diffs with newlines

---
 diffmatchpatch/diff.go      | 27 +++++++++++++++++++++------
 diffmatchpatch/diff_test.go | 11 +++++++++++
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/diffmatchpatch/diff.go b/diffmatchpatch/diff.go
index 4f7b424..a312c1a 100644
--- a/diffmatchpatch/diff.go
+++ b/diffmatchpatch/diff.go
@@ -1151,13 +1151,28 @@ func (dmp *DiffMatchPatch) DiffPrettyText(diffs []Diff) string {
 
 		switch diff.Type {
 		case DiffInsert:
-			_, _ = buff.WriteString("\x1b[32m")
-			_, _ = buff.WriteString(text)
-			_, _ = buff.WriteString("\x1b[0m")
+			lines := strings.Split(text, "\n")
+			for i, line := range lines {
+				_, _ = buff.WriteString("\x1b[32m")
+				_, _ = buff.WriteString(line)
+				if i < len(lines)-1 {
+					_, _ = buff.WriteString("\x1b[0m\n")
+				} else {
+					_, _ = buff.WriteString("\x1b[0m")
+				}
+			}
+
 		case DiffDelete:
-			_, _ = buff.WriteString("\x1b[31m")
-			_, _ = buff.WriteString(text)
-			_, _ = buff.WriteString("\x1b[0m")
+			lines := strings.Split(text, "\n")
+			for i, line := range lines {
+				_, _ = buff.WriteString("\x1b[31m")
+				_, _ = buff.WriteString(line)
+				if i < len(lines)-1 {
+					_, _ = buff.WriteString("\x1b[0m\n")
+				} else {
+					_, _ = buff.WriteString("\x1b[0m")
+				}
+			}
 		case DiffEqual:
 			_, _ = buff.WriteString(text)
 		}
diff --git a/diffmatchpatch/diff_test.go b/diffmatchpatch/diff_test.go
index d6fed50..04329d6 100644
--- a/diffmatchpatch/diff_test.go
+++ b/diffmatchpatch/diff_test.go
@@ -1033,6 +1033,17 @@ func TestDiffPrettyText(t *testing.T) {
 
 			Expected: "a\n\x1b[31m<B>b</B>\x1b[0m\x1b[32mc&d\x1b[0m",
 		},
+		{
+			Diffs: []Diff{
+				{Type: DiffEqual, Text: "a\n"},
+				{Type: DiffDelete, Text: "b\nc\n"},
+				{Type: DiffEqual, Text: "def"},
+				{Type: DiffInsert, Text: "\ng\nh"},
+				{Type: DiffEqual, Text: "\ni"},
+			},
+
+			Expected: "a\n\x1b[31mb\x1b[0m\n\x1b[31mc\x1b[0m\n\x1b[31m\x1b[0mdef\x1b[32m\x1b[0m\n\x1b[32mg\x1b[0m\n\x1b[32mh\x1b[0m\ni",
+		},
 	} {
 		actual := dmp.DiffPrettyText(tc.Diffs)
 		assert.Equal(t, tc.Expected, actual, fmt.Sprintf("Test case #%d, %#v", i, tc))