@@ -79,6 +79,9 @@ func splice(slice []Diff, index int, amount int, elements ...Diff) []Diff {
7979 return slice
8080}
8181
82+ // DiffFunction represents a function that can compute differences between two rune slices
83+ type DiffFunction func (text1 , text2 []rune ) []Diff
84+
8285// DiffMain finds the differences between two texts.
8386// If an invalid UTF-8 sequence is encountered, it will be replaced by the Unicode replacement character.
8487func (dmp * DiffMatchPatch ) DiffMain (text1 , text2 string , checklines bool ) []Diff {
@@ -88,14 +91,20 @@ func (dmp *DiffMatchPatch) DiffMain(text1, text2 string, checklines bool) []Diff
8891// DiffMainRunes finds the differences between two rune sequences.
8992// If an invalid UTF-8 sequence is encountered, it will be replaced by the Unicode replacement character.
9093func (dmp * DiffMatchPatch ) DiffMainRunes (text1 , text2 []rune , checklines bool ) []Diff {
91- var deadline time.Time
92- if dmp .DiffTimeout > 0 {
93- deadline = time .Now ().Add (dmp .DiffTimeout )
94+ deadline := dmp .getDeadline ()
95+
96+ // Encapsulate the deadline and line mode logic in the closure
97+ diffFn := func (text1 , text2 []rune ) []Diff {
98+ if checklines && len (text1 ) > 100 && len (text2 ) > 100 {
99+ return dmp .diffBigLine (text1 , text2 , deadline )
100+ }
101+ return dmp .diffBisect (text1 , text2 , deadline )
94102 }
95- return dmp .diffMainRunes (text1 , text2 , checklines , deadline )
103+
104+ return dmp .diffMainRunes (text1 , text2 , diffFn )
96105}
97106
98- func (dmp * DiffMatchPatch ) diffMainRunes (text1 , text2 []rune , checklines bool , deadline time. Time ) []Diff {
107+ func (dmp * DiffMatchPatch ) diffMainRunes (text1 , text2 []rune , diffFn DiffFunction ) []Diff {
99108 if runesEqual (text1 , text2 ) {
100109 var diffs []Diff
101110 if len (text1 ) > 0 {
@@ -116,7 +125,7 @@ func (dmp *DiffMatchPatch) diffMainRunes(text1, text2 []rune, checklines bool, d
116125 text2 = text2 [:len (text2 )- commonlength ]
117126
118127 // Compute the diff on the middle block.
119- diffs := dmp .diffCompute (text1 , text2 , checklines , deadline )
128+ diffs := dmp .diffCompute (text1 , text2 , diffFn )
120129
121130 // Restore the prefix and suffix.
122131 if len (commonprefix ) != 0 {
@@ -129,8 +138,16 @@ func (dmp *DiffMatchPatch) diffMainRunes(text1, text2 []rune, checklines bool, d
129138 return dmp .DiffCleanupMerge (diffs )
130139}
131140
141+ // getDeadline returns the deadline for the diff operation
142+ func (dmp * DiffMatchPatch ) getDeadline () time.Time {
143+ if dmp .DiffTimeout > 0 {
144+ return time .Now ().Add (dmp .DiffTimeout )
145+ }
146+ return time.Time {}
147+ }
148+
132149// diffCompute finds the differences between two rune slices. Assumes that the texts do not have any common prefix or suffix.
133- func (dmp * DiffMatchPatch ) diffCompute (text1 , text2 []rune , checklines bool , deadline time. Time ) []Diff {
150+ func (dmp * DiffMatchPatch ) diffCompute (text1 , text2 []rune , diffFn DiffFunction ) []Diff {
134151 diffs := []Diff {}
135152 if len (text1 ) == 0 {
136153 // Just add some text (speedup).
@@ -177,25 +194,30 @@ func (dmp *DiffMatchPatch) diffCompute(text1, text2 []rune, checklines bool, dea
177194 text2B := hm [3 ]
178195 midCommon := hm [4 ]
179196 // Send both pairs off for separate processing.
180- diffsA := dmp .diffMainRunes (text1A , text2A , checklines , deadline )
181- diffsB := dmp .diffMainRunes (text1B , text2B , checklines , deadline )
197+ diffsA := dmp .diffMainRunes (text1A , text2A , diffFn )
198+ diffsB := dmp .diffMainRunes (text1B , text2B , diffFn )
182199 // Merge the results.
183200 diffs := diffsA
184201 diffs = append (diffs , Diff {DiffEqual , string (midCommon )})
185202 diffs = append (diffs , diffsB ... )
186203 return diffs
187- } else if checklines && len (text1 ) > 100 && len (text2 ) > 100 {
188- return dmp .diffLineMode (text1 , text2 , deadline )
189204 }
190- return dmp .diffBisect (text1 , text2 , deadline )
205+
206+ return diffFn (text1 , text2 )
191207}
192208
193- // diffLineMode does a quick line-level diff on both []runes, then rediff the parts for greater accuracy. This speedup can produce non-minimal diffs.
194- func (dmp * DiffMatchPatch ) diffLineMode (text1 , text2 []rune , deadline time.Time ) []Diff {
209+ // diffBigLine does a quick line-level diff on both []runes, then rediff the parts for greater accuracy. This speedup can produce non-minimal diffs.
210+ func (dmp * DiffMatchPatch ) diffBigLine (text1 , text2 []rune , deadline time.Time ) []Diff {
195211 // Scan the text on a line-by-line basis first.
196212 text1 , text2 , linearray := dmp .DiffLinesToRunes (string (text1 ), string (text2 ))
197213
198- diffs := dmp .diffMainRunes (text1 , text2 , false , deadline )
214+ // For line-level diffing, we want to do a simple comparison of the line-based runes
215+ // rather than character-by-character diffing
216+ diffFn := func (text1 , text2 []rune ) []Diff {
217+ return dmp .diffBisect (text1 , text2 , deadline )
218+ }
219+
220+ diffs := dmp .diffMainRunes (text1 , text2 , diffFn )
199221
200222 // Convert the diff back to original text.
201223 diffs = dmp .DiffCharsToLines (diffs , linearray )
@@ -230,7 +252,7 @@ func (dmp *DiffMatchPatch) diffLineMode(text1, text2 []rune, deadline time.Time)
230252 countDelete + countInsert )
231253
232254 pointer = pointer - countDelete - countInsert
233- a := dmp .diffMainRunes ([]rune (textDelete ), []rune (textInsert ), false , deadline )
255+ a := dmp .diffMainRunes ([]rune (textDelete ), []rune (textInsert ), diffFn )
234256 for j := len (a ) - 1 ; j >= 0 ; j -- {
235257 diffs = splice (diffs , pointer , 0 , a [j ])
236258 }
@@ -248,6 +270,37 @@ func (dmp *DiffMatchPatch) diffLineMode(text1, text2 []rune, deadline time.Time)
248270 return diffs [:len (diffs )- 1 ] // Remove the dummy entry at the end.
249271}
250272
273+ // DiffLineMode finds the differences between two texts, always using line mode.
274+ // Unlike DiffMain with checklines=true, this method will always use line mode regardless of text length.
275+ // If an invalid UTF-8 sequence is encountered, it will be replaced by the Unicode replacement character.
276+ func (dmp * DiffMatchPatch ) DiffLineMode (text1 , text2 string ) []Diff {
277+ return dmp .diffOnlyByLines ([]rune (text1 ), []rune (text2 ))
278+ }
279+
280+ // diffOnlyByLines finds the differences between two texts, only by lines.
281+ func (dmp * DiffMatchPatch ) diffOnlyByLines (text1 , text2 []rune ) []Diff {
282+ // For line-level diffing, we want to do a simple comparison of the line-based runes
283+ // rather than character-by-character diffing
284+ diffFn := func (text1 , text2 []rune ) []Diff {
285+ if ! runesEqual (text1 , text2 ) {
286+ return []Diff {
287+ {DiffDelete , string (text1 )},
288+ {DiffInsert , string (text2 )},
289+ }
290+ }
291+ return []Diff {{DiffEqual , string (text1 )}}
292+ }
293+
294+ // For line-based diffing, we want to avoid the character-based optimizations in diffCompute
295+ // and just use our simple diff function directly
296+ diffs := diffFn (text1 , text2 )
297+
298+ // Optimize line-based diffs using line-specific cleanup
299+ diffs = dmp .DiffCleanupLineBased (diffs )
300+
301+ return diffs
302+ }
303+
251304// DiffBisect finds the 'middle snake' of a diff, split the problem in two and return the recursively constructed diff.
252305// If an invalid UTF-8 sequence is encountered, it will be replaced by the Unicode replacement character.
253306// See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
@@ -380,9 +433,14 @@ func (dmp *DiffMatchPatch) diffBisectSplit(runes1, runes2 []rune, x, y int,
380433 runes1b := runes1 [x :]
381434 runes2b := runes2 [y :]
382435
436+ // wrap dmp.diffBisect with deadline
437+ diffFn := func (text1 , text2 []rune ) []Diff {
438+ return dmp .diffBisect (text1 , text2 , deadline )
439+ }
440+
383441 // Compute both diffs serially.
384- diffs := dmp .diffMainRunes (runes1a , runes2a , false , deadline )
385- diffsb := dmp .diffMainRunes (runes1b , runes2b , false , deadline )
442+ diffs := dmp .diffMainRunes (runes1a , runes2a , diffFn )
443+ diffsb := dmp .diffMainRunes (runes1b , runes2b , diffFn )
386444
387445 return append (diffs , diffsb ... )
388446}
@@ -953,6 +1011,77 @@ func (dmp *DiffMatchPatch) DiffCleanupEfficiency(diffs []Diff) []Diff {
9531011 return diffs
9541012}
9551013
1014+ // DiffCleanupLineBased optimizes line-based diffs by merging consecutive operations,
1015+ // removing empty line diffs, and grouping related line changes together.
1016+ // This function is specifically designed for line-level diffing where each diff
1017+ // represents entire lines rather than character-level changes.
1018+ func (dmp * DiffMatchPatch ) DiffCleanupLineBased (diffs []Diff ) []Diff {
1019+ if len (diffs ) == 0 {
1020+ return diffs
1021+ }
1022+
1023+ // First pass: merge consecutive operations of the same type
1024+ cleaned := make ([]Diff , 0 , len (diffs ))
1025+ pointer := 0
1026+
1027+ for pointer < len (diffs ) {
1028+ current := diffs [pointer ]
1029+
1030+ // If this is an equality, just add it
1031+ if current .Type == DiffEqual {
1032+ cleaned = append (cleaned , current )
1033+ pointer ++
1034+ continue
1035+ }
1036+
1037+ // Collect consecutive operations of the same type
1038+ mergedText := current .Text
1039+ pointer ++
1040+
1041+ // Merge consecutive deletions or insertions
1042+ for pointer < len (diffs ) && diffs [pointer ].Type == current .Type {
1043+ mergedText += diffs [pointer ].Text
1044+ pointer ++
1045+ }
1046+
1047+ // Only add non-empty merged operations
1048+ if len (strings .TrimSpace (mergedText )) > 0 {
1049+ cleaned = append (cleaned , Diff {current .Type , mergedText })
1050+ }
1051+ }
1052+
1053+ // Second pass: remove trivial equalities (empty lines or whitespace-only lines)
1054+ // and merge adjacent equalities
1055+ if len (cleaned ) > 1 {
1056+ final := make ([]Diff , 0 , len (cleaned ))
1057+
1058+ for i := 0 ; i < len (cleaned ); i ++ {
1059+ current := cleaned [i ]
1060+
1061+ // Skip empty or whitespace-only equalities
1062+ if current .Type == DiffEqual && len (strings .TrimSpace (current .Text )) == 0 {
1063+ continue
1064+ }
1065+
1066+ // Merge consecutive equalities
1067+ if current .Type == DiffEqual && len (final ) > 0 && final [len (final )- 1 ].Type == DiffEqual {
1068+ final [len (final )- 1 ].Text += current .Text
1069+ } else {
1070+ final = append (final , current )
1071+ }
1072+ }
1073+
1074+ cleaned = final
1075+ }
1076+
1077+ // Third pass: optimize deletion-insertion pairs
1078+ // If we have a deletion followed by an insertion, and they're similar,
1079+ // we might want to keep them as separate operations for clarity in line-based diffs
1080+ // This preserves the line-by-line nature of the diff
1081+
1082+ return cleaned
1083+ }
1084+
9561085// DiffCleanupMerge reorders and merges like edit sections. Merge equalities.
9571086// Any edit section can move as long as it doesn't cross an equality.
9581087func (dmp * DiffMatchPatch ) DiffCleanupMerge (diffs []Diff ) []Diff {
0 commit comments