cleanupSemantic.js
1 'use strict'; 2 3 Object.defineProperty(exports, '__esModule', { 4 value: true 5 }); 6 exports.cleanupSemantic = exports.DIFF_INSERT = exports.DIFF_DELETE = exports.DIFF_EQUAL = exports.Diff = void 0; 7 8 function _defineProperty(obj, key, value) { 9 if (key in obj) { 10 Object.defineProperty(obj, key, { 11 value: value, 12 enumerable: true, 13 configurable: true, 14 writable: true 15 }); 16 } else { 17 obj[key] = value; 18 } 19 return obj; 20 } 21 22 /** 23 * Diff Match and Patch 24 * Copyright 2018 The diff-match-patch Authors. 25 * https://github.com/google/diff-match-patch 26 * 27 * Licensed under the Apache License, Version 2.0 (the "License"); 28 * you may not use this file except in compliance with the License. 29 * You may obtain a copy of the License at 30 * 31 * http://www.apache.org/licenses/LICENSE-2.0 32 * 33 * Unless required by applicable law or agreed to in writing, software 34 * distributed under the License is distributed on an "AS IS" BASIS, 35 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 36 * See the License for the specific language governing permissions and 37 * limitations under the License. 38 */ 39 40 /** 41 * @fileoverview Computes the difference between two texts to create a patch. 42 * Applies the patch onto another text, allowing for errors. 43 * @author fraser@google.com (Neil Fraser) 44 */ 45 46 /** 47 * CHANGES by pedrottimark to diff_match_patch_uncompressed.ts file: 48 * 49 * 1. Delete anything not needed to use diff_cleanupSemantic method 50 * 2. Convert from prototype properties to var declarations 51 * 3. Convert Diff to class from constructor and prototype 52 * 4. Add type annotations for arguments and return values 53 * 5. Add exports 54 */ 55 56 /** 57 * The data structure representing a diff is an array of tuples: 58 * [[DIFF_DELETE, 'Hello'], [DIFF_INSERT, 'Goodbye'], [DIFF_EQUAL, ' world.']] 59 * which means: delete 'Hello', add 'Goodbye' and keep ' world.' 60 */ 61 var DIFF_DELETE = -1; 62 exports.DIFF_DELETE = DIFF_DELETE; 63 var DIFF_INSERT = 1; 64 exports.DIFF_INSERT = DIFF_INSERT; 65 var DIFF_EQUAL = 0; 66 /** 67 * Class representing one diff tuple. 68 * Attempts to look like a two-element array (which is what this used to be). 69 * @param {number} op Operation, one of: DIFF_DELETE, DIFF_INSERT, DIFF_EQUAL. 70 * @param {string} text Text to be deleted, inserted, or retained. 71 * @constructor 72 */ 73 74 exports.DIFF_EQUAL = DIFF_EQUAL; 75 76 class Diff { 77 constructor(op, text) { 78 _defineProperty(this, 0, void 0); 79 80 _defineProperty(this, 1, void 0); 81 82 this[0] = op; 83 this[1] = text; 84 } 85 } 86 /** 87 * Determine the common prefix of two strings. 88 * @param {string} text1 First string. 89 * @param {string} text2 Second string. 90 * @return {number} The number of characters common to the start of each 91 * string. 92 */ 93 94 exports.Diff = Diff; 95 96 var diff_commonPrefix = function (text1, text2) { 97 // Quick check for common null cases. 98 if (!text1 || !text2 || text1.charAt(0) != text2.charAt(0)) { 99 return 0; 100 } // Binary search. 101 // Performance analysis: https://neil.fraser.name/news/2007/10/09/ 102 103 var pointermin = 0; 104 var pointermax = Math.min(text1.length, text2.length); 105 var pointermid = pointermax; 106 var pointerstart = 0; 107 108 while (pointermin < pointermid) { 109 if ( 110 text1.substring(pointerstart, pointermid) == 111 text2.substring(pointerstart, pointermid) 112 ) { 113 pointermin = pointermid; 114 pointerstart = pointermin; 115 } else { 116 pointermax = pointermid; 117 } 118 119 pointermid = Math.floor((pointermax - pointermin) / 2 + pointermin); 120 } 121 122 return pointermid; 123 }; 124 /** 125 * Determine the common suffix of two strings. 126 * @param {string} text1 First string. 127 * @param {string} text2 Second string. 128 * @return {number} The number of characters common to the end of each string. 129 */ 130 131 var diff_commonSuffix = function (text1, text2) { 132 // Quick check for common null cases. 133 if ( 134 !text1 || 135 !text2 || 136 text1.charAt(text1.length - 1) != text2.charAt(text2.length - 1) 137 ) { 138 return 0; 139 } // Binary search. 140 // Performance analysis: https://neil.fraser.name/news/2007/10/09/ 141 142 var pointermin = 0; 143 var pointermax = Math.min(text1.length, text2.length); 144 var pointermid = pointermax; 145 var pointerend = 0; 146 147 while (pointermin < pointermid) { 148 if ( 149 text1.substring(text1.length - pointermid, text1.length - pointerend) == 150 text2.substring(text2.length - pointermid, text2.length - pointerend) 151 ) { 152 pointermin = pointermid; 153 pointerend = pointermin; 154 } else { 155 pointermax = pointermid; 156 } 157 158 pointermid = Math.floor((pointermax - pointermin) / 2 + pointermin); 159 } 160 161 return pointermid; 162 }; 163 /** 164 * Determine if the suffix of one string is the prefix of another. 165 * @param {string} text1 First string. 166 * @param {string} text2 Second string. 167 * @return {number} The number of characters common to the end of the first 168 * string and the start of the second string. 169 * @private 170 */ 171 172 var diff_commonOverlap_ = function (text1, text2) { 173 // Cache the text lengths to prevent multiple calls. 174 var text1_length = text1.length; 175 var text2_length = text2.length; // Eliminate the null case. 176 177 if (text1_length == 0 || text2_length == 0) { 178 return 0; 179 } // Truncate the longer string. 180 181 if (text1_length > text2_length) { 182 text1 = text1.substring(text1_length - text2_length); 183 } else if (text1_length < text2_length) { 184 text2 = text2.substring(0, text1_length); 185 } 186 187 var text_length = Math.min(text1_length, text2_length); // Quick check for the worst case. 188 189 if (text1 == text2) { 190 return text_length; 191 } // Start by looking for a single character match 192 // and increase length until no match is found. 193 // Performance analysis: https://neil.fraser.name/news/2010/11/04/ 194 195 var best = 0; 196 var length = 1; 197 198 while (true) { 199 var pattern = text1.substring(text_length - length); 200 var found = text2.indexOf(pattern); 201 202 if (found == -1) { 203 return best; 204 } 205 206 length += found; 207 208 if ( 209 found == 0 || 210 text1.substring(text_length - length) == text2.substring(0, length) 211 ) { 212 best = length; 213 length++; 214 } 215 } 216 }; 217 /** 218 * Reduce the number of edits by eliminating semantically trivial equalities. 219 * @param {!Array.<!diff_match_patch.Diff>} diffs Array of diff tuples. 220 */ 221 222 var diff_cleanupSemantic = function (diffs) { 223 var changes = false; 224 var equalities = []; // Stack of indices where equalities are found. 225 226 var equalitiesLength = 0; // Keeping our own length var is faster in JS. 227 228 /** @type {?string} */ 229 230 var lastEquality = null; // Always equal to diffs[equalities[equalitiesLength - 1]][1] 231 232 var pointer = 0; // Index of current position. 233 // Number of characters that changed prior to the equality. 234 235 var length_insertions1 = 0; 236 var length_deletions1 = 0; // Number of characters that changed after the equality. 237 238 var length_insertions2 = 0; 239 var length_deletions2 = 0; 240 241 while (pointer < diffs.length) { 242 if (diffs[pointer][0] == DIFF_EQUAL) { 243 // Equality found. 244 equalities[equalitiesLength++] = pointer; 245 length_insertions1 = length_insertions2; 246 length_deletions1 = length_deletions2; 247 length_insertions2 = 0; 248 length_deletions2 = 0; 249 lastEquality = diffs[pointer][1]; 250 } else { 251 // An insertion or deletion. 252 if (diffs[pointer][0] == DIFF_INSERT) { 253 length_insertions2 += diffs[pointer][1].length; 254 } else { 255 length_deletions2 += diffs[pointer][1].length; 256 } // Eliminate an equality that is smaller or equal to the edits on both 257 // sides of it. 258 259 if ( 260 lastEquality && 261 lastEquality.length <= 262 Math.max(length_insertions1, length_deletions1) && 263 lastEquality.length <= Math.max(length_insertions2, length_deletions2) 264 ) { 265 // Duplicate record. 266 diffs.splice( 267 equalities[equalitiesLength - 1], 268 0, 269 new Diff(DIFF_DELETE, lastEquality) 270 ); // Change second copy to insert. 271 272 diffs[equalities[equalitiesLength - 1] + 1][0] = DIFF_INSERT; // Throw away the equality we just deleted. 273 274 equalitiesLength--; // Throw away the previous equality (it needs to be reevaluated). 275 276 equalitiesLength--; 277 pointer = equalitiesLength > 0 ? equalities[equalitiesLength - 1] : -1; 278 length_insertions1 = 0; // Reset the counters. 279 280 length_deletions1 = 0; 281 length_insertions2 = 0; 282 length_deletions2 = 0; 283 lastEquality = null; 284 changes = true; 285 } 286 } 287 288 pointer++; 289 } // Normalize the diff. 290 291 if (changes) { 292 diff_cleanupMerge(diffs); 293 } 294 295 diff_cleanupSemanticLossless(diffs); // Find any overlaps between deletions and insertions. 296 // e.g: <del>abcxxx</del><ins>xxxdef</ins> 297 // -> <del>abc</del>xxx<ins>def</ins> 298 // e.g: <del>xxxabc</del><ins>defxxx</ins> 299 // -> <ins>def</ins>xxx<del>abc</del> 300 // Only extract an overlap if it is as big as the edit ahead or behind it. 301 302 pointer = 1; 303 304 while (pointer < diffs.length) { 305 if ( 306 diffs[pointer - 1][0] == DIFF_DELETE && 307 diffs[pointer][0] == DIFF_INSERT 308 ) { 309 var deletion = diffs[pointer - 1][1]; 310 var insertion = diffs[pointer][1]; 311 var overlap_length1 = diff_commonOverlap_(deletion, insertion); 312 var overlap_length2 = diff_commonOverlap_(insertion, deletion); 313 314 if (overlap_length1 >= overlap_length2) { 315 if ( 316 overlap_length1 >= deletion.length / 2 || 317 overlap_length1 >= insertion.length / 2 318 ) { 319 // Overlap found. Insert an equality and trim the surrounding edits. 320 diffs.splice( 321 pointer, 322 0, 323 new Diff(DIFF_EQUAL, insertion.substring(0, overlap_length1)) 324 ); 325 diffs[pointer - 1][1] = deletion.substring( 326 0, 327 deletion.length - overlap_length1 328 ); 329 diffs[pointer + 1][1] = insertion.substring(overlap_length1); 330 pointer++; 331 } 332 } else { 333 if ( 334 overlap_length2 >= deletion.length / 2 || 335 overlap_length2 >= insertion.length / 2 336 ) { 337 // Reverse overlap found. 338 // Insert an equality and swap and trim the surrounding edits. 339 diffs.splice( 340 pointer, 341 0, 342 new Diff(DIFF_EQUAL, deletion.substring(0, overlap_length2)) 343 ); 344 diffs[pointer - 1][0] = DIFF_INSERT; 345 diffs[pointer - 1][1] = insertion.substring( 346 0, 347 insertion.length - overlap_length2 348 ); 349 diffs[pointer + 1][0] = DIFF_DELETE; 350 diffs[pointer + 1][1] = deletion.substring(overlap_length2); 351 pointer++; 352 } 353 } 354 355 pointer++; 356 } 357 358 pointer++; 359 } 360 }; 361 /** 362 * Look for single edits surrounded on both sides by equalities 363 * which can be shifted sideways to align the edit to a word boundary. 364 * e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came. 365 * @param {!Array.<!diff_match_patch.Diff>} diffs Array of diff tuples. 366 */ 367 368 exports.cleanupSemantic = diff_cleanupSemantic; 369 370 var diff_cleanupSemanticLossless = function (diffs) { 371 /** 372 * Given two strings, compute a score representing whether the internal 373 * boundary falls on logical boundaries. 374 * Scores range from 6 (best) to 0 (worst). 375 * Closure, but does not reference any external variables. 376 * @param {string} one First string. 377 * @param {string} two Second string. 378 * @return {number} The score. 379 * @private 380 */ 381 function diff_cleanupSemanticScore_(one, two) { 382 if (!one || !two) { 383 // Edges are the best. 384 return 6; 385 } // Each port of this function behaves slightly differently due to 386 // subtle differences in each language's definition of things like 387 // 'whitespace'. Since this function's purpose is largely cosmetic, 388 // the choice has been made to use each language's native features 389 // rather than force total conformity. 390 391 var char1 = one.charAt(one.length - 1); 392 var char2 = two.charAt(0); 393 var nonAlphaNumeric1 = char1.match(nonAlphaNumericRegex_); 394 var nonAlphaNumeric2 = char2.match(nonAlphaNumericRegex_); 395 var whitespace1 = nonAlphaNumeric1 && char1.match(whitespaceRegex_); 396 var whitespace2 = nonAlphaNumeric2 && char2.match(whitespaceRegex_); 397 var lineBreak1 = whitespace1 && char1.match(linebreakRegex_); 398 var lineBreak2 = whitespace2 && char2.match(linebreakRegex_); 399 var blankLine1 = lineBreak1 && one.match(blanklineEndRegex_); 400 var blankLine2 = lineBreak2 && two.match(blanklineStartRegex_); 401 402 if (blankLine1 || blankLine2) { 403 // Five points for blank lines. 404 return 5; 405 } else if (lineBreak1 || lineBreak2) { 406 // Four points for line breaks. 407 return 4; 408 } else if (nonAlphaNumeric1 && !whitespace1 && whitespace2) { 409 // Three points for end of sentences. 410 return 3; 411 } else if (whitespace1 || whitespace2) { 412 // Two points for whitespace. 413 return 2; 414 } else if (nonAlphaNumeric1 || nonAlphaNumeric2) { 415 // One point for non-alphanumeric. 416 return 1; 417 } 418 419 return 0; 420 } 421 422 var pointer = 1; // Intentionally ignore the first and last element (don't need checking). 423 424 while (pointer < diffs.length - 1) { 425 if ( 426 diffs[pointer - 1][0] == DIFF_EQUAL && 427 diffs[pointer + 1][0] == DIFF_EQUAL 428 ) { 429 // This is a single edit surrounded by equalities. 430 var equality1 = diffs[pointer - 1][1]; 431 var edit = diffs[pointer][1]; 432 var equality2 = diffs[pointer + 1][1]; // First, shift the edit as far left as possible. 433 434 var commonOffset = diff_commonSuffix(equality1, edit); 435 436 if (commonOffset) { 437 var commonString = edit.substring(edit.length - commonOffset); 438 equality1 = equality1.substring(0, equality1.length - commonOffset); 439 edit = commonString + edit.substring(0, edit.length - commonOffset); 440 equality2 = commonString + equality2; 441 } // Second, step character by character right, looking for the best fit. 442 443 var bestEquality1 = equality1; 444 var bestEdit = edit; 445 var bestEquality2 = equality2; 446 var bestScore = 447 diff_cleanupSemanticScore_(equality1, edit) + 448 diff_cleanupSemanticScore_(edit, equality2); 449 450 while (edit.charAt(0) === equality2.charAt(0)) { 451 equality1 += edit.charAt(0); 452 edit = edit.substring(1) + equality2.charAt(0); 453 equality2 = equality2.substring(1); 454 var score = 455 diff_cleanupSemanticScore_(equality1, edit) + 456 diff_cleanupSemanticScore_(edit, equality2); // The >= encourages trailing rather than leading whitespace on edits. 457 458 if (score >= bestScore) { 459 bestScore = score; 460 bestEquality1 = equality1; 461 bestEdit = edit; 462 bestEquality2 = equality2; 463 } 464 } 465 466 if (diffs[pointer - 1][1] != bestEquality1) { 467 // We have an improvement, save it back to the diff. 468 if (bestEquality1) { 469 diffs[pointer - 1][1] = bestEquality1; 470 } else { 471 diffs.splice(pointer - 1, 1); 472 pointer--; 473 } 474 475 diffs[pointer][1] = bestEdit; 476 477 if (bestEquality2) { 478 diffs[pointer + 1][1] = bestEquality2; 479 } else { 480 diffs.splice(pointer + 1, 1); 481 pointer--; 482 } 483 } 484 } 485 486 pointer++; 487 } 488 }; // Define some regex patterns for matching boundaries. 489 490 var nonAlphaNumericRegex_ = /[^a-zA-Z0-9]/; 491 var whitespaceRegex_ = /\s/; 492 var linebreakRegex_ = /[\r\n]/; 493 var blanklineEndRegex_ = /\n\r?\n$/; 494 var blanklineStartRegex_ = /^\r?\n\r?\n/; 495 /** 496 * Reorder and merge like edit sections. Merge equalities. 497 * Any edit section can move as long as it doesn't cross an equality. 498 * @param {!Array.<!diff_match_patch.Diff>} diffs Array of diff tuples. 499 */ 500 501 var diff_cleanupMerge = function (diffs) { 502 // Add a dummy entry at the end. 503 diffs.push(new Diff(DIFF_EQUAL, '')); 504 var pointer = 0; 505 var count_delete = 0; 506 var count_insert = 0; 507 var text_delete = ''; 508 var text_insert = ''; 509 var commonlength; 510 511 while (pointer < diffs.length) { 512 switch (diffs[pointer][0]) { 513 case DIFF_INSERT: 514 count_insert++; 515 text_insert += diffs[pointer][1]; 516 pointer++; 517 break; 518 519 case DIFF_DELETE: 520 count_delete++; 521 text_delete += diffs[pointer][1]; 522 pointer++; 523 break; 524 525 case DIFF_EQUAL: 526 // Upon reaching an equality, check for prior redundancies. 527 if (count_delete + count_insert > 1) { 528 if (count_delete !== 0 && count_insert !== 0) { 529 // Factor out any common prefixies. 530 commonlength = diff_commonPrefix(text_insert, text_delete); 531 532 if (commonlength !== 0) { 533 if ( 534 pointer - count_delete - count_insert > 0 && 535 diffs[pointer - count_delete - count_insert - 1][0] == 536 DIFF_EQUAL 537 ) { 538 diffs[ 539 pointer - count_delete - count_insert - 1 540 ][1] += text_insert.substring(0, commonlength); 541 } else { 542 diffs.splice( 543 0, 544 0, 545 new Diff(DIFF_EQUAL, text_insert.substring(0, commonlength)) 546 ); 547 pointer++; 548 } 549 550 text_insert = text_insert.substring(commonlength); 551 text_delete = text_delete.substring(commonlength); 552 } // Factor out any common suffixies. 553 554 commonlength = diff_commonSuffix(text_insert, text_delete); 555 556 if (commonlength !== 0) { 557 diffs[pointer][1] = 558 text_insert.substring(text_insert.length - commonlength) + 559 diffs[pointer][1]; 560 text_insert = text_insert.substring( 561 0, 562 text_insert.length - commonlength 563 ); 564 text_delete = text_delete.substring( 565 0, 566 text_delete.length - commonlength 567 ); 568 } 569 } // Delete the offending records and add the merged ones. 570 571 pointer -= count_delete + count_insert; 572 diffs.splice(pointer, count_delete + count_insert); 573 574 if (text_delete.length) { 575 diffs.splice(pointer, 0, new Diff(DIFF_DELETE, text_delete)); 576 pointer++; 577 } 578 579 if (text_insert.length) { 580 diffs.splice(pointer, 0, new Diff(DIFF_INSERT, text_insert)); 581 pointer++; 582 } 583 584 pointer++; 585 } else if (pointer !== 0 && diffs[pointer - 1][0] == DIFF_EQUAL) { 586 // Merge this equality with the previous one. 587 diffs[pointer - 1][1] += diffs[pointer][1]; 588 diffs.splice(pointer, 1); 589 } else { 590 pointer++; 591 } 592 593 count_insert = 0; 594 count_delete = 0; 595 text_delete = ''; 596 text_insert = ''; 597 break; 598 } 599 } 600 601 if (diffs[diffs.length - 1][1] === '') { 602 diffs.pop(); // Remove the dummy entry at the end. 603 } // Second pass: look for single edits surrounded on both sides by equalities 604 // which can be shifted sideways to eliminate an equality. 605 // e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC 606 607 var changes = false; 608 pointer = 1; // Intentionally ignore the first and last element (don't need checking). 609 610 while (pointer < diffs.length - 1) { 611 if ( 612 diffs[pointer - 1][0] == DIFF_EQUAL && 613 diffs[pointer + 1][0] == DIFF_EQUAL 614 ) { 615 // This is a single edit surrounded by equalities. 616 if ( 617 diffs[pointer][1].substring( 618 diffs[pointer][1].length - diffs[pointer - 1][1].length 619 ) == diffs[pointer - 1][1] 620 ) { 621 // Shift the edit over the previous equality. 622 diffs[pointer][1] = 623 diffs[pointer - 1][1] + 624 diffs[pointer][1].substring( 625 0, 626 diffs[pointer][1].length - diffs[pointer - 1][1].length 627 ); 628 diffs[pointer + 1][1] = diffs[pointer - 1][1] + diffs[pointer + 1][1]; 629 diffs.splice(pointer - 1, 1); 630 changes = true; 631 } else if ( 632 diffs[pointer][1].substring(0, diffs[pointer + 1][1].length) == 633 diffs[pointer + 1][1] 634 ) { 635 // Shift the edit over the next equality. 636 diffs[pointer - 1][1] += diffs[pointer + 1][1]; 637 diffs[pointer][1] = 638 diffs[pointer][1].substring(diffs[pointer + 1][1].length) + 639 diffs[pointer + 1][1]; 640 diffs.splice(pointer + 1, 1); 641 changes = true; 642 } 643 } 644 645 pointer++; 646 } // If shifts were made, the diff needs reordering and another shift sweep. 647 648 if (changes) { 649 diff_cleanupMerge(diffs); 650 } 651 };