cleanupSemantic.js
  1  'use strict';
  2  
  3  Object.defineProperty(exports, '__esModule', {
  4    value: true
  5  });
  6  exports.cleanupSemantic = exports.DIFF_INSERT = exports.DIFF_DELETE = exports.DIFF_EQUAL = exports.Diff = void 0;
  7  
  8  function _defineProperty(obj, key, value) {
  9    if (key in obj) {
 10      Object.defineProperty(obj, key, {
 11        value: value,
 12        enumerable: true,
 13        configurable: true,
 14        writable: true
 15      });
 16    } else {
 17      obj[key] = value;
 18    }
 19    return obj;
 20  }
 21  
 22  /**
 23   * Diff Match and Patch
 24   * Copyright 2018 The diff-match-patch Authors.
 25   * https://github.com/google/diff-match-patch
 26   *
 27   * Licensed under the Apache License, Version 2.0 (the "License");
 28   * you may not use this file except in compliance with the License.
 29   * You may obtain a copy of the License at
 30   *
 31   *   http://www.apache.org/licenses/LICENSE-2.0
 32   *
 33   * Unless required by applicable law or agreed to in writing, software
 34   * distributed under the License is distributed on an "AS IS" BASIS,
 35   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 36   * See the License for the specific language governing permissions and
 37   * limitations under the License.
 38   */
 39  
 40  /**
 41   * @fileoverview Computes the difference between two texts to create a patch.
 42   * Applies the patch onto another text, allowing for errors.
 43   * @author fraser@google.com (Neil Fraser)
 44   */
 45  
 46  /**
 47   * CHANGES by pedrottimark to diff_match_patch_uncompressed.ts file:
 48   *
 49   * 1. Delete anything not needed to use diff_cleanupSemantic method
 50   * 2. Convert from prototype properties to var declarations
 51   * 3. Convert Diff to class from constructor and prototype
 52   * 4. Add type annotations for arguments and return values
 53   * 5. Add exports
 54   */
 55  
 56  /**
 57   * The data structure representing a diff is an array of tuples:
 58   * [[DIFF_DELETE, 'Hello'], [DIFF_INSERT, 'Goodbye'], [DIFF_EQUAL, ' world.']]
 59   * which means: delete 'Hello', add 'Goodbye' and keep ' world.'
 60   */
 61  var DIFF_DELETE = -1;
 62  exports.DIFF_DELETE = DIFF_DELETE;
 63  var DIFF_INSERT = 1;
 64  exports.DIFF_INSERT = DIFF_INSERT;
 65  var DIFF_EQUAL = 0;
 66  /**
 67   * Class representing one diff tuple.
 68   * Attempts to look like a two-element array (which is what this used to be).
 69   * @param {number} op Operation, one of: DIFF_DELETE, DIFF_INSERT, DIFF_EQUAL.
 70   * @param {string} text Text to be deleted, inserted, or retained.
 71   * @constructor
 72   */
 73  
 74  exports.DIFF_EQUAL = DIFF_EQUAL;
 75  
 76  class Diff {
 77    constructor(op, text) {
 78      _defineProperty(this, 0, void 0);
 79  
 80      _defineProperty(this, 1, void 0);
 81  
 82      this[0] = op;
 83      this[1] = text;
 84    }
 85  }
 86  /**
 87   * Determine the common prefix of two strings.
 88   * @param {string} text1 First string.
 89   * @param {string} text2 Second string.
 90   * @return {number} The number of characters common to the start of each
 91   *     string.
 92   */
 93  
 94  exports.Diff = Diff;
 95  
 96  var diff_commonPrefix = function (text1, text2) {
 97    // Quick check for common null cases.
 98    if (!text1 || !text2 || text1.charAt(0) != text2.charAt(0)) {
 99      return 0;
100    } // Binary search.
101    // Performance analysis: https://neil.fraser.name/news/2007/10/09/
102  
103    var pointermin = 0;
104    var pointermax = Math.min(text1.length, text2.length);
105    var pointermid = pointermax;
106    var pointerstart = 0;
107  
108    while (pointermin < pointermid) {
109      if (
110        text1.substring(pointerstart, pointermid) ==
111        text2.substring(pointerstart, pointermid)
112      ) {
113        pointermin = pointermid;
114        pointerstart = pointermin;
115      } else {
116        pointermax = pointermid;
117      }
118  
119      pointermid = Math.floor((pointermax - pointermin) / 2 + pointermin);
120    }
121  
122    return pointermid;
123  };
124  /**
125   * Determine the common suffix of two strings.
126   * @param {string} text1 First string.
127   * @param {string} text2 Second string.
128   * @return {number} The number of characters common to the end of each string.
129   */
130  
131  var diff_commonSuffix = function (text1, text2) {
132    // Quick check for common null cases.
133    if (
134      !text1 ||
135      !text2 ||
136      text1.charAt(text1.length - 1) != text2.charAt(text2.length - 1)
137    ) {
138      return 0;
139    } // Binary search.
140    // Performance analysis: https://neil.fraser.name/news/2007/10/09/
141  
142    var pointermin = 0;
143    var pointermax = Math.min(text1.length, text2.length);
144    var pointermid = pointermax;
145    var pointerend = 0;
146  
147    while (pointermin < pointermid) {
148      if (
149        text1.substring(text1.length - pointermid, text1.length - pointerend) ==
150        text2.substring(text2.length - pointermid, text2.length - pointerend)
151      ) {
152        pointermin = pointermid;
153        pointerend = pointermin;
154      } else {
155        pointermax = pointermid;
156      }
157  
158      pointermid = Math.floor((pointermax - pointermin) / 2 + pointermin);
159    }
160  
161    return pointermid;
162  };
163  /**
164   * Determine if the suffix of one string is the prefix of another.
165   * @param {string} text1 First string.
166   * @param {string} text2 Second string.
167   * @return {number} The number of characters common to the end of the first
168   *     string and the start of the second string.
169   * @private
170   */
171  
172  var diff_commonOverlap_ = function (text1, text2) {
173    // Cache the text lengths to prevent multiple calls.
174    var text1_length = text1.length;
175    var text2_length = text2.length; // Eliminate the null case.
176  
177    if (text1_length == 0 || text2_length == 0) {
178      return 0;
179    } // Truncate the longer string.
180  
181    if (text1_length > text2_length) {
182      text1 = text1.substring(text1_length - text2_length);
183    } else if (text1_length < text2_length) {
184      text2 = text2.substring(0, text1_length);
185    }
186  
187    var text_length = Math.min(text1_length, text2_length); // Quick check for the worst case.
188  
189    if (text1 == text2) {
190      return text_length;
191    } // Start by looking for a single character match
192    // and increase length until no match is found.
193    // Performance analysis: https://neil.fraser.name/news/2010/11/04/
194  
195    var best = 0;
196    var length = 1;
197  
198    while (true) {
199      var pattern = text1.substring(text_length - length);
200      var found = text2.indexOf(pattern);
201  
202      if (found == -1) {
203        return best;
204      }
205  
206      length += found;
207  
208      if (
209        found == 0 ||
210        text1.substring(text_length - length) == text2.substring(0, length)
211      ) {
212        best = length;
213        length++;
214      }
215    }
216  };
217  /**
218   * Reduce the number of edits by eliminating semantically trivial equalities.
219   * @param {!Array.<!diff_match_patch.Diff>} diffs Array of diff tuples.
220   */
221  
222  var diff_cleanupSemantic = function (diffs) {
223    var changes = false;
224    var equalities = []; // Stack of indices where equalities are found.
225  
226    var equalitiesLength = 0; // Keeping our own length var is faster in JS.
227  
228    /** @type {?string} */
229  
230    var lastEquality = null; // Always equal to diffs[equalities[equalitiesLength - 1]][1]
231  
232    var pointer = 0; // Index of current position.
233    // Number of characters that changed prior to the equality.
234  
235    var length_insertions1 = 0;
236    var length_deletions1 = 0; // Number of characters that changed after the equality.
237  
238    var length_insertions2 = 0;
239    var length_deletions2 = 0;
240  
241    while (pointer < diffs.length) {
242      if (diffs[pointer][0] == DIFF_EQUAL) {
243        // Equality found.
244        equalities[equalitiesLength++] = pointer;
245        length_insertions1 = length_insertions2;
246        length_deletions1 = length_deletions2;
247        length_insertions2 = 0;
248        length_deletions2 = 0;
249        lastEquality = diffs[pointer][1];
250      } else {
251        // An insertion or deletion.
252        if (diffs[pointer][0] == DIFF_INSERT) {
253          length_insertions2 += diffs[pointer][1].length;
254        } else {
255          length_deletions2 += diffs[pointer][1].length;
256        } // Eliminate an equality that is smaller or equal to the edits on both
257        // sides of it.
258  
259        if (
260          lastEquality &&
261          lastEquality.length <=
262            Math.max(length_insertions1, length_deletions1) &&
263          lastEquality.length <= Math.max(length_insertions2, length_deletions2)
264        ) {
265          // Duplicate record.
266          diffs.splice(
267            equalities[equalitiesLength - 1],
268            0,
269            new Diff(DIFF_DELETE, lastEquality)
270          ); // Change second copy to insert.
271  
272          diffs[equalities[equalitiesLength - 1] + 1][0] = DIFF_INSERT; // Throw away the equality we just deleted.
273  
274          equalitiesLength--; // Throw away the previous equality (it needs to be reevaluated).
275  
276          equalitiesLength--;
277          pointer = equalitiesLength > 0 ? equalities[equalitiesLength - 1] : -1;
278          length_insertions1 = 0; // Reset the counters.
279  
280          length_deletions1 = 0;
281          length_insertions2 = 0;
282          length_deletions2 = 0;
283          lastEquality = null;
284          changes = true;
285        }
286      }
287  
288      pointer++;
289    } // Normalize the diff.
290  
291    if (changes) {
292      diff_cleanupMerge(diffs);
293    }
294  
295    diff_cleanupSemanticLossless(diffs); // Find any overlaps between deletions and insertions.
296    // e.g: <del>abcxxx</del><ins>xxxdef</ins>
297    //   -> <del>abc</del>xxx<ins>def</ins>
298    // e.g: <del>xxxabc</del><ins>defxxx</ins>
299    //   -> <ins>def</ins>xxx<del>abc</del>
300    // Only extract an overlap if it is as big as the edit ahead or behind it.
301  
302    pointer = 1;
303  
304    while (pointer < diffs.length) {
305      if (
306        diffs[pointer - 1][0] == DIFF_DELETE &&
307        diffs[pointer][0] == DIFF_INSERT
308      ) {
309        var deletion = diffs[pointer - 1][1];
310        var insertion = diffs[pointer][1];
311        var overlap_length1 = diff_commonOverlap_(deletion, insertion);
312        var overlap_length2 = diff_commonOverlap_(insertion, deletion);
313  
314        if (overlap_length1 >= overlap_length2) {
315          if (
316            overlap_length1 >= deletion.length / 2 ||
317            overlap_length1 >= insertion.length / 2
318          ) {
319            // Overlap found.  Insert an equality and trim the surrounding edits.
320            diffs.splice(
321              pointer,
322              0,
323              new Diff(DIFF_EQUAL, insertion.substring(0, overlap_length1))
324            );
325            diffs[pointer - 1][1] = deletion.substring(
326              0,
327              deletion.length - overlap_length1
328            );
329            diffs[pointer + 1][1] = insertion.substring(overlap_length1);
330            pointer++;
331          }
332        } else {
333          if (
334            overlap_length2 >= deletion.length / 2 ||
335            overlap_length2 >= insertion.length / 2
336          ) {
337            // Reverse overlap found.
338            // Insert an equality and swap and trim the surrounding edits.
339            diffs.splice(
340              pointer,
341              0,
342              new Diff(DIFF_EQUAL, deletion.substring(0, overlap_length2))
343            );
344            diffs[pointer - 1][0] = DIFF_INSERT;
345            diffs[pointer - 1][1] = insertion.substring(
346              0,
347              insertion.length - overlap_length2
348            );
349            diffs[pointer + 1][0] = DIFF_DELETE;
350            diffs[pointer + 1][1] = deletion.substring(overlap_length2);
351            pointer++;
352          }
353        }
354  
355        pointer++;
356      }
357  
358      pointer++;
359    }
360  };
361  /**
362   * Look for single edits surrounded on both sides by equalities
363   * which can be shifted sideways to align the edit to a word boundary.
364   * e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
365   * @param {!Array.<!diff_match_patch.Diff>} diffs Array of diff tuples.
366   */
367  
368  exports.cleanupSemantic = diff_cleanupSemantic;
369  
370  var diff_cleanupSemanticLossless = function (diffs) {
371    /**
372     * Given two strings, compute a score representing whether the internal
373     * boundary falls on logical boundaries.
374     * Scores range from 6 (best) to 0 (worst).
375     * Closure, but does not reference any external variables.
376     * @param {string} one First string.
377     * @param {string} two Second string.
378     * @return {number} The score.
379     * @private
380     */
381    function diff_cleanupSemanticScore_(one, two) {
382      if (!one || !two) {
383        // Edges are the best.
384        return 6;
385      } // Each port of this function behaves slightly differently due to
386      // subtle differences in each language's definition of things like
387      // 'whitespace'.  Since this function's purpose is largely cosmetic,
388      // the choice has been made to use each language's native features
389      // rather than force total conformity.
390  
391      var char1 = one.charAt(one.length - 1);
392      var char2 = two.charAt(0);
393      var nonAlphaNumeric1 = char1.match(nonAlphaNumericRegex_);
394      var nonAlphaNumeric2 = char2.match(nonAlphaNumericRegex_);
395      var whitespace1 = nonAlphaNumeric1 && char1.match(whitespaceRegex_);
396      var whitespace2 = nonAlphaNumeric2 && char2.match(whitespaceRegex_);
397      var lineBreak1 = whitespace1 && char1.match(linebreakRegex_);
398      var lineBreak2 = whitespace2 && char2.match(linebreakRegex_);
399      var blankLine1 = lineBreak1 && one.match(blanklineEndRegex_);
400      var blankLine2 = lineBreak2 && two.match(blanklineStartRegex_);
401  
402      if (blankLine1 || blankLine2) {
403        // Five points for blank lines.
404        return 5;
405      } else if (lineBreak1 || lineBreak2) {
406        // Four points for line breaks.
407        return 4;
408      } else if (nonAlphaNumeric1 && !whitespace1 && whitespace2) {
409        // Three points for end of sentences.
410        return 3;
411      } else if (whitespace1 || whitespace2) {
412        // Two points for whitespace.
413        return 2;
414      } else if (nonAlphaNumeric1 || nonAlphaNumeric2) {
415        // One point for non-alphanumeric.
416        return 1;
417      }
418  
419      return 0;
420    }
421  
422    var pointer = 1; // Intentionally ignore the first and last element (don't need checking).
423  
424    while (pointer < diffs.length - 1) {
425      if (
426        diffs[pointer - 1][0] == DIFF_EQUAL &&
427        diffs[pointer + 1][0] == DIFF_EQUAL
428      ) {
429        // This is a single edit surrounded by equalities.
430        var equality1 = diffs[pointer - 1][1];
431        var edit = diffs[pointer][1];
432        var equality2 = diffs[pointer + 1][1]; // First, shift the edit as far left as possible.
433  
434        var commonOffset = diff_commonSuffix(equality1, edit);
435  
436        if (commonOffset) {
437          var commonString = edit.substring(edit.length - commonOffset);
438          equality1 = equality1.substring(0, equality1.length - commonOffset);
439          edit = commonString + edit.substring(0, edit.length - commonOffset);
440          equality2 = commonString + equality2;
441        } // Second, step character by character right, looking for the best fit.
442  
443        var bestEquality1 = equality1;
444        var bestEdit = edit;
445        var bestEquality2 = equality2;
446        var bestScore =
447          diff_cleanupSemanticScore_(equality1, edit) +
448          diff_cleanupSemanticScore_(edit, equality2);
449  
450        while (edit.charAt(0) === equality2.charAt(0)) {
451          equality1 += edit.charAt(0);
452          edit = edit.substring(1) + equality2.charAt(0);
453          equality2 = equality2.substring(1);
454          var score =
455            diff_cleanupSemanticScore_(equality1, edit) +
456            diff_cleanupSemanticScore_(edit, equality2); // The >= encourages trailing rather than leading whitespace on edits.
457  
458          if (score >= bestScore) {
459            bestScore = score;
460            bestEquality1 = equality1;
461            bestEdit = edit;
462            bestEquality2 = equality2;
463          }
464        }
465  
466        if (diffs[pointer - 1][1] != bestEquality1) {
467          // We have an improvement, save it back to the diff.
468          if (bestEquality1) {
469            diffs[pointer - 1][1] = bestEquality1;
470          } else {
471            diffs.splice(pointer - 1, 1);
472            pointer--;
473          }
474  
475          diffs[pointer][1] = bestEdit;
476  
477          if (bestEquality2) {
478            diffs[pointer + 1][1] = bestEquality2;
479          } else {
480            diffs.splice(pointer + 1, 1);
481            pointer--;
482          }
483        }
484      }
485  
486      pointer++;
487    }
488  }; // Define some regex patterns for matching boundaries.
489  
490  var nonAlphaNumericRegex_ = /[^a-zA-Z0-9]/;
491  var whitespaceRegex_ = /\s/;
492  var linebreakRegex_ = /[\r\n]/;
493  var blanklineEndRegex_ = /\n\r?\n$/;
494  var blanklineStartRegex_ = /^\r?\n\r?\n/;
495  /**
496   * Reorder and merge like edit sections.  Merge equalities.
497   * Any edit section can move as long as it doesn't cross an equality.
498   * @param {!Array.<!diff_match_patch.Diff>} diffs Array of diff tuples.
499   */
500  
501  var diff_cleanupMerge = function (diffs) {
502    // Add a dummy entry at the end.
503    diffs.push(new Diff(DIFF_EQUAL, ''));
504    var pointer = 0;
505    var count_delete = 0;
506    var count_insert = 0;
507    var text_delete = '';
508    var text_insert = '';
509    var commonlength;
510  
511    while (pointer < diffs.length) {
512      switch (diffs[pointer][0]) {
513        case DIFF_INSERT:
514          count_insert++;
515          text_insert += diffs[pointer][1];
516          pointer++;
517          break;
518  
519        case DIFF_DELETE:
520          count_delete++;
521          text_delete += diffs[pointer][1];
522          pointer++;
523          break;
524  
525        case DIFF_EQUAL:
526          // Upon reaching an equality, check for prior redundancies.
527          if (count_delete + count_insert > 1) {
528            if (count_delete !== 0 && count_insert !== 0) {
529              // Factor out any common prefixies.
530              commonlength = diff_commonPrefix(text_insert, text_delete);
531  
532              if (commonlength !== 0) {
533                if (
534                  pointer - count_delete - count_insert > 0 &&
535                  diffs[pointer - count_delete - count_insert - 1][0] ==
536                    DIFF_EQUAL
537                ) {
538                  diffs[
539                    pointer - count_delete - count_insert - 1
540                  ][1] += text_insert.substring(0, commonlength);
541                } else {
542                  diffs.splice(
543                    0,
544                    0,
545                    new Diff(DIFF_EQUAL, text_insert.substring(0, commonlength))
546                  );
547                  pointer++;
548                }
549  
550                text_insert = text_insert.substring(commonlength);
551                text_delete = text_delete.substring(commonlength);
552              } // Factor out any common suffixies.
553  
554              commonlength = diff_commonSuffix(text_insert, text_delete);
555  
556              if (commonlength !== 0) {
557                diffs[pointer][1] =
558                  text_insert.substring(text_insert.length - commonlength) +
559                  diffs[pointer][1];
560                text_insert = text_insert.substring(
561                  0,
562                  text_insert.length - commonlength
563                );
564                text_delete = text_delete.substring(
565                  0,
566                  text_delete.length - commonlength
567                );
568              }
569            } // Delete the offending records and add the merged ones.
570  
571            pointer -= count_delete + count_insert;
572            diffs.splice(pointer, count_delete + count_insert);
573  
574            if (text_delete.length) {
575              diffs.splice(pointer, 0, new Diff(DIFF_DELETE, text_delete));
576              pointer++;
577            }
578  
579            if (text_insert.length) {
580              diffs.splice(pointer, 0, new Diff(DIFF_INSERT, text_insert));
581              pointer++;
582            }
583  
584            pointer++;
585          } else if (pointer !== 0 && diffs[pointer - 1][0] == DIFF_EQUAL) {
586            // Merge this equality with the previous one.
587            diffs[pointer - 1][1] += diffs[pointer][1];
588            diffs.splice(pointer, 1);
589          } else {
590            pointer++;
591          }
592  
593          count_insert = 0;
594          count_delete = 0;
595          text_delete = '';
596          text_insert = '';
597          break;
598      }
599    }
600  
601    if (diffs[diffs.length - 1][1] === '') {
602      diffs.pop(); // Remove the dummy entry at the end.
603    } // Second pass: look for single edits surrounded on both sides by equalities
604    // which can be shifted sideways to eliminate an equality.
605    // e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
606  
607    var changes = false;
608    pointer = 1; // Intentionally ignore the first and last element (don't need checking).
609  
610    while (pointer < diffs.length - 1) {
611      if (
612        diffs[pointer - 1][0] == DIFF_EQUAL &&
613        diffs[pointer + 1][0] == DIFF_EQUAL
614      ) {
615        // This is a single edit surrounded by equalities.
616        if (
617          diffs[pointer][1].substring(
618            diffs[pointer][1].length - diffs[pointer - 1][1].length
619          ) == diffs[pointer - 1][1]
620        ) {
621          // Shift the edit over the previous equality.
622          diffs[pointer][1] =
623            diffs[pointer - 1][1] +
624            diffs[pointer][1].substring(
625              0,
626              diffs[pointer][1].length - diffs[pointer - 1][1].length
627            );
628          diffs[pointer + 1][1] = diffs[pointer - 1][1] + diffs[pointer + 1][1];
629          diffs.splice(pointer - 1, 1);
630          changes = true;
631        } else if (
632          diffs[pointer][1].substring(0, diffs[pointer + 1][1].length) ==
633          diffs[pointer + 1][1]
634        ) {
635          // Shift the edit over the next equality.
636          diffs[pointer - 1][1] += diffs[pointer + 1][1];
637          diffs[pointer][1] =
638            diffs[pointer][1].substring(diffs[pointer + 1][1].length) +
639            diffs[pointer + 1][1];
640          diffs.splice(pointer + 1, 1);
641          changes = true;
642        }
643      }
644  
645      pointer++;
646    } // If shifts were made, the diff needs reordering and another shift sweep.
647  
648    if (changes) {
649      diff_cleanupMerge(diffs);
650    }
651  };