@@ -88,6 +88,35 @@ diff_match_patch.Diff.prototype.toString = function() {
8888 return this [ 0 ] + ',' + this [ 1 ] ;
8989} ;
9090
91+ diff_match_patch . prototype . isHighSurrogate = function ( c ) {
92+ var v = c . charCodeAt ( 0 ) ;
93+ return v >= 0xD800 && v <= 0xDBFF ;
94+ }
95+
96+ diff_match_patch . prototype . isLowSurrogate = function ( c ) {
97+ var v = c . charCodeAt ( 0 ) ;
98+ return v >= 0xDC00 && v <= 0xDFFF ;
99+ }
100+
101+ diff_match_patch . prototype . scalarValues = function ( str ) {
102+ var length = str . length ;
103+ var scalars = [ ] ;
104+
105+ for ( var i = 0 ; i < length ; i ++ ) {
106+ var scalar = str [ i ] ;
107+
108+ // proper surrogate pairs will come through as the whole scalar value
109+ // but if the pairs are broken they will be passed-through unaltered
110+ if ( i < length - 1 && this . isHighSurrogate ( scalar ) && this . isLowSurrogate ( str [ i + 1 ] ) ) {
111+ scalar += str [ i + 1 ] ;
112+ i ++ ;
113+ }
114+
115+ scalars . push ( scalar ) ;
116+ }
117+
118+ return scalars ;
119+ }
91120
92121/**
93122 * Find the differences between two texts. Simplifies the problem by stripping
@@ -134,12 +163,18 @@ diff_match_patch.prototype.diff_main = function(text1, text2, opt_checklines,
134163
135164 // Trim off common prefix (speedup).
136165 var commonlength = this . diff_commonPrefix ( text1 , text2 ) ;
166+ if ( commonlength > 0 && this . isHighSurrogate ( text1 [ commonlength - 1 ] ) ) {
167+ commonlength -- ;
168+ }
137169 var commonprefix = text1 . substring ( 0 , commonlength ) ;
138170 text1 = text1 . substring ( commonlength ) ;
139171 text2 = text2 . substring ( commonlength ) ;
140172
141173 // Trim off common suffix (speedup).
142174 commonlength = this . diff_commonSuffix ( text1 , text2 ) ;
175+ if ( commonlength > 0 && this . isLowSurrogate ( text1 [ text1 . length - commonlength ] ) ) {
176+ commonlength -- ;
177+ }
143178 var commonsuffix = text1 . substring ( text1 . length - commonlength ) ;
144179 text1 = text1 . substring ( 0 , text1 . length - commonlength ) ;
145180 text2 = text2 . substring ( 0 , text2 . length - commonlength ) ;
@@ -187,13 +222,23 @@ diff_match_patch.prototype.diff_compute_ = function(text1, text2, checklines,
187222
188223 var longtext = text1 . length > text2 . length ? text1 : text2 ;
189224 var shorttext = text1 . length > text2 . length ? text2 : text1 ;
225+ var shortlength = shorttext . length ;
190226 var i = longtext . indexOf ( shorttext ) ;
191227 if ( i != - 1 ) {
228+ // skip leading unpaired surrogate
229+ if ( this . isLowSurrogate ( longtext [ i ] ) ) {
230+ shortlength -- ;
231+ i ++ ;
232+ }
233+ // skip trailing unpaired surrogate
234+ if ( this . isHighSurrogate ( longtext [ i + shortlength ] ) ) {
235+ shortlength -- ;
236+ }
192237 // Shorter text is inside the longer text (speedup).
193238 diffs = [ new diff_match_patch . Diff ( DIFF_INSERT , longtext . substring ( 0 , i ) ) ,
194239 new diff_match_patch . Diff ( DIFF_EQUAL , shorttext ) ,
195240 new diff_match_patch . Diff ( DIFF_INSERT ,
196- longtext . substring ( i + shorttext . length ) ) ] ;
241+ longtext . substring ( i + shortlength ) ) ] ;
197242 // Swap insertions for deletions if diff is reversed.
198243 if ( text1 . length > text2 . length ) {
199244 diffs [ 0 ] [ 0 ] = diffs [ 2 ] [ 0 ] = DIFF_DELETE ;
@@ -439,6 +484,15 @@ diff_match_patch.prototype.diff_bisect_ = function(text1, text2, deadline) {
439484 */
440485diff_match_patch . prototype . diff_bisectSplit_ = function ( text1 , text2 , x , y ,
441486 deadline ) {
487+ // backup if we split a surrogate
488+ if (
489+ x > 0 && x < text1 . length && this . isLowSurrogate ( text1 [ x ] ) &&
490+ y > 0 && y < text2 . length && this . isLowSurrogate ( text2 [ y ] )
491+ ) {
492+ x -- ;
493+ y -- ;
494+ }
495+
442496 var text1a = text1 . substring ( 0 , x ) ;
443497 var text2a = text2 . substring ( 0 , y ) ;
444498 var text1b = text1 . substring ( x ) ;
@@ -569,6 +623,12 @@ diff_match_patch.prototype.diff_commonPrefix = function(text1, text2) {
569623 }
570624 pointermid = Math . floor ( ( pointermax - pointermin ) / 2 + pointermin ) ;
571625 }
626+
627+ // shorten the prefix if it splits a surrogate
628+ if ( pointermid > 0 && this . isHighSurrogate ( text1 [ pointermid - 1 ] ) ) {
629+ pointermid -- ;
630+ }
631+
572632 return pointermid ;
573633} ;
574634
@@ -601,6 +661,12 @@ diff_match_patch.prototype.diff_commonSuffix = function(text1, text2) {
601661 }
602662 pointermid = Math . floor ( ( pointermax - pointermin ) / 2 + pointermin ) ;
603663 }
664+
665+ // shorten the suffix if it splits a surrogate
666+ if ( pointermid < length - 1 && this . isLowSurrogate ( text1 [ pointermid ] ) ) {
667+ pointermid ++ ;
668+ }
669+
604670 return pointermid ;
605671} ;
606672
@@ -749,6 +815,24 @@ diff_match_patch.prototype.diff_halfMatch_ = function(text1, text2) {
749815 text1_b = hm [ 3 ] ;
750816 }
751817 var mid_common = hm [ 4 ] ;
818+
819+ // move forward to prevent splitting a surrogate pair
820+ if ( mid_common . length > 0 && this . isLowSurrogate ( mid_common [ 0 ] ) ) {
821+ text1_a = text1_a + mid_common [ 0 ] ;
822+ text2_a = text2_a + mid_common [ 0 ] ;
823+ mid_common = mid_common . substring ( 1 ) ;
824+ }
825+
826+ // back up to prevent splitting a surrogate pair
827+ if (
828+ text1_b . length > 0 && this . isLowSurrogate ( text1_b [ 0 ] ) &&
829+ text2_b . length > 0 && this . isLowSurrogate ( text2_b [ 0 ] )
830+ ) {
831+ text1_b = mid_common [ mid_common . length - 1 ] + text1_b ;
832+ text2_b = mid_common [ mid_common . length - 1 ] + text2_b ;
833+ mid_common = mid_common . substring ( 0 , - 1 ) ;
834+ }
835+
752836 return [ text1_a , text1_b , text2_a , text2_b , mid_common ] ;
753837} ;
754838
0 commit comments