difflib.js
1 // Generated by CoffeeScript 1.3.1 2 3 /* 4 Module difflib -- helpers for computing deltas between objects. 5 6 Function getCloseMatches(word, possibilities, n=3, cutoff=0.6): 7 Use SequenceMatcher to return list of the best "good enough" matches. 8 9 Function contextDiff(a, b): 10 For two lists of strings, return a delta in context diff format. 11 12 Function ndiff(a, b): 13 Return a delta: the difference between `a` and `b` (lists of strings). 14 15 Function restore(delta, which): 16 Return one of the two sequences that generated an ndiff delta. 17 18 Function unifiedDiff(a, b): 19 For two lists of strings, return a delta in unified diff format. 20 21 Class SequenceMatcher: 22 A flexible class for comparing pairs of sequences of any type. 23 24 Class Differ: 25 For producing human-readable deltas from sequences of lines of text. 26 */ 27 28 29 (function() { 30 var Differ, Heap, IS_CHARACTER_JUNK, IS_LINE_JUNK, SequenceMatcher, assert, contextDiff, floor, getCloseMatches, max, min, ndiff, restore, unifiedDiff, _any, _arrayCmp, _calculateRatio, _countLeading, _formatRangeContext, _formatRangeUnified, _has, 31 __indexOf = [].indexOf || function(item) { for (var i = 0, l = this.length; i < l; i++) { if (i in this && this[i] === item) return i; } return -1; }; 32 33 floor = Math.floor, max = Math.max, min = Math.min; 34 35 Heap = require('heap'); 36 37 assert = require('assert'); 38 39 _calculateRatio = function(matches, length) { 40 if (length) { 41 return 2.0 * matches / length; 42 } else { 43 return 1.0; 44 } 45 }; 46 47 _arrayCmp = function(a, b) { 48 var i, la, lb, _i, _ref, _ref1; 49 _ref = [a.length, b.length], la = _ref[0], lb = _ref[1]; 50 for (i = _i = 0, _ref1 = min(la, lb); 0 <= _ref1 ? _i < _ref1 : _i > _ref1; i = 0 <= _ref1 ? ++_i : --_i) { 51 if (a[i] < b[i]) { 52 return -1; 53 } 54 if (a[i] > b[i]) { 55 return 1; 56 } 57 } 58 return la - lb; 59 }; 60 61 _has = function(obj, key) { 62 return Object.prototype.hasOwnProperty.call(obj, key); 63 }; 64 65 _any = function(items) { 66 var item, _i, _len; 67 for (_i = 0, _len = items.length; _i < _len; _i++) { 68 item = items[_i]; 69 if (item) { 70 return true; 71 } 72 } 73 return false; 74 }; 75 76 SequenceMatcher = (function() { 77 78 SequenceMatcher.name = 'SequenceMatcher'; 79 80 /* 81 SequenceMatcher is a flexible class for comparing pairs of sequences of 82 any type, so long as the sequence elements are hashable. The basic 83 algorithm predates, and is a little fancier than, an algorithm 84 published in the late 1980's by Ratcliff and Obershelp under the 85 hyperbolic name "gestalt pattern matching". The basic idea is to find 86 the longest contiguous matching subsequence that contains no "junk" 87 elements (R-O doesn't address junk). The same idea is then applied 88 recursively to the pieces of the sequences to the left and to the right 89 of the matching subsequence. This does not yield minimal edit 90 sequences, but does tend to yield matches that "look right" to people. 91 92 SequenceMatcher tries to compute a "human-friendly diff" between two 93 sequences. Unlike e.g. UNIX(tm) diff, the fundamental notion is the 94 longest *contiguous* & junk-free matching subsequence. That's what 95 catches peoples' eyes. The Windows(tm) windiff has another interesting 96 notion, pairing up elements that appear uniquely in each sequence. 97 That, and the method here, appear to yield more intuitive difference 98 reports than does diff. This method appears to be the least vulnerable 99 to synching up on blocks of "junk lines", though (like blank lines in 100 ordinary text files, or maybe "<P>" lines in HTML files). That may be 101 because this is the only method of the 3 that has a *concept* of 102 "junk" <wink>. 103 104 Example, comparing two strings, and considering blanks to be "junk": 105 106 >>> isjunk = (c) -> c is ' ' 107 >>> s = new SequenceMatcher(isjunk, 108 'private Thread currentThread;', 109 'private volatile Thread currentThread;') 110 111 .ratio() returns a float in [0, 1], measuring the "similarity" of the 112 sequences. As a rule of thumb, a .ratio() value over 0.6 means the 113 sequences are close matches: 114 115 >>> s.ratio().toPrecision(3) 116 '0.866' 117 118 If you're only interested in where the sequences match, 119 .getMatchingBlocks() is handy: 120 121 >>> for [a, b, size] in s.getMatchingBlocks() 122 ... console.log("a[#{a}] and b[#{b}] match for #{size} elements"); 123 a[0] and b[0] match for 8 elements 124 a[8] and b[17] match for 21 elements 125 a[29] and b[38] match for 0 elements 126 127 Note that the last tuple returned by .get_matching_blocks() is always a 128 dummy, (len(a), len(b), 0), and this is the only case in which the last 129 tuple element (number of elements matched) is 0. 130 131 If you want to know how to change the first sequence into the second, 132 use .get_opcodes(): 133 134 >>> for [op, a1, a2, b1, b2] in s.getOpcodes() 135 ... console.log "#{op} a[#{a1}:#{a2}] b[#{b1}:#{b2}]" 136 equal a[0:8] b[0:8] 137 insert a[8:8] b[8:17] 138 equal a[8:29] b[17:38] 139 140 See the Differ class for a fancy human-friendly file differencer, which 141 uses SequenceMatcher both to compare sequences of lines, and to compare 142 sequences of characters within similar (near-matching) lines. 143 144 See also function getCloseMatches() in this module, which shows how 145 simple code building on SequenceMatcher can be used to do useful work. 146 147 Timing: Basic R-O is cubic time worst case and quadratic time expected 148 case. SequenceMatcher is quadratic time for the worst case and has 149 expected-case behavior dependent in a complicated way on how many 150 elements the sequences have in common; best case time is linear. 151 152 Methods: 153 154 constructor(isjunk=null, a='', b='') 155 Construct a SequenceMatcher. 156 157 setSeqs(a, b) 158 Set the two sequences to be compared. 159 160 setSeq1(a) 161 Set the first sequence to be compared. 162 163 setSeq2(b) 164 Set the second sequence to be compared. 165 166 findLongestMatch(alo, ahi, blo, bhi) 167 Find longest matching block in a[alo:ahi] and b[blo:bhi]. 168 169 getMatchingBlocks() 170 Return list of triples describing matching subsequences. 171 172 getOpcodes() 173 Return list of 5-tuples describing how to turn a into b. 174 175 ratio() 176 Return a measure of the sequences' similarity (float in [0,1]). 177 178 quickRatio() 179 Return an upper bound on .ratio() relatively quickly. 180 181 realQuickRatio() 182 Return an upper bound on ratio() very quickly. 183 */ 184 185 186 function SequenceMatcher(isjunk, a, b, autojunk) { 187 this.isjunk = isjunk; 188 if (a == null) { 189 a = ''; 190 } 191 if (b == null) { 192 b = ''; 193 } 194 this.autojunk = autojunk != null ? autojunk : true; 195 /* 196 Construct a SequenceMatcher. 197 198 Optional arg isjunk is null (the default), or a one-argument 199 function that takes a sequence element and returns true iff the 200 element is junk. Null is equivalent to passing "(x) -> 0", i.e. 201 no elements are considered to be junk. For example, pass 202 (x) -> x in ' \t' 203 if you're comparing lines as sequences of characters, and don't 204 want to synch up on blanks or hard tabs. 205 206 Optional arg a is the first of two sequences to be compared. By 207 default, an empty string. The elements of a must be hashable. See 208 also .setSeqs() and .setSeq1(). 209 210 Optional arg b is the second of two sequences to be compared. By 211 default, an empty string. The elements of b must be hashable. See 212 also .setSeqs() and .setSeq2(). 213 214 Optional arg autojunk should be set to false to disable the 215 "automatic junk heuristic" that treats popular elements as junk 216 (see module documentation for more information). 217 */ 218 219 this.a = this.b = null; 220 this.setSeqs(a, b); 221 } 222 223 SequenceMatcher.prototype.setSeqs = function(a, b) { 224 /* 225 Set the two sequences to be compared. 226 227 >>> s = new SequenceMatcher() 228 >>> s.setSeqs('abcd', 'bcde') 229 >>> s.ratio() 230 0.75 231 */ 232 this.setSeq1(a); 233 return this.setSeq2(b); 234 }; 235 236 SequenceMatcher.prototype.setSeq1 = function(a) { 237 /* 238 Set the first sequence to be compared. 239 240 The second sequence to be compared is not changed. 241 242 >>> s = new SequenceMatcher(null, 'abcd', 'bcde') 243 >>> s.ratio() 244 0.75 245 >>> s.setSeq1('bcde') 246 >>> s.ratio() 247 1.0 248 249 SequenceMatcher computes and caches detailed information about the 250 second sequence, so if you want to compare one sequence S against 251 many sequences, use .setSeq2(S) once and call .setSeq1(x) 252 repeatedly for each of the other sequences. 253 254 See also setSeqs() and setSeq2(). 255 */ 256 if (a === this.a) { 257 return; 258 } 259 this.a = a; 260 return this.matchingBlocks = this.opcodes = null; 261 }; 262 263 SequenceMatcher.prototype.setSeq2 = function(b) { 264 /* 265 Set the second sequence to be compared. 266 267 The first sequence to be compared is not changed. 268 269 >>> s = new SequenceMatcher(null, 'abcd', 'bcde') 270 >>> s.ratio() 271 0.75 272 >>> s.setSeq2('abcd') 273 >>> s.ratio() 274 1.0 275 276 SequenceMatcher computes and caches detailed information about the 277 second sequence, so if you want to compare one sequence S against 278 many sequences, use .setSeq2(S) once and call .setSeq1(x) 279 repeatedly for each of the other sequences. 280 281 See also setSeqs() and setSeq1(). 282 */ 283 if (b === this.b) { 284 return; 285 } 286 this.b = b; 287 this.matchingBlocks = this.opcodes = null; 288 this.fullbcount = null; 289 return this._chainB(); 290 }; 291 292 SequenceMatcher.prototype._chainB = function() { 293 var b, b2j, elt, i, idxs, indices, isjunk, junk, n, ntest, popular, _i, _j, _len, _len1, _ref; 294 b = this.b; 295 this.b2j = b2j = {}; 296 for (i = _i = 0, _len = b.length; _i < _len; i = ++_i) { 297 elt = b[i]; 298 indices = _has(b2j, elt) ? b2j[elt] : b2j[elt] = []; 299 indices.push(i); 300 } 301 junk = {}; 302 isjunk = this.isjunk; 303 if (isjunk) { 304 _ref = Object.keys(b2j); 305 for (_j = 0, _len1 = _ref.length; _j < _len1; _j++) { 306 elt = _ref[_j]; 307 if (isjunk(elt)) { 308 junk[elt] = true; 309 delete b2j[elt]; 310 } 311 } 312 } 313 popular = {}; 314 n = b.length; 315 if (this.autojunk && n >= 200) { 316 ntest = floor(n / 100) + 1; 317 for (elt in b2j) { 318 idxs = b2j[elt]; 319 if (idxs.length > ntest) { 320 popular[elt] = true; 321 delete b2j[elt]; 322 } 323 } 324 } 325 this.isbjunk = function(b) { 326 return _has(junk, b); 327 }; 328 return this.isbpopular = function(b) { 329 return _has(popular, b); 330 }; 331 }; 332 333 SequenceMatcher.prototype.findLongestMatch = function(alo, ahi, blo, bhi) { 334 /* 335 Find longest matching block in a[alo...ahi] and b[blo...bhi]. 336 337 If isjunk is not defined: 338 339 Return [i,j,k] such that a[i...i+k] is equal to b[j...j+k], where 340 alo <= i <= i+k <= ahi 341 blo <= j <= j+k <= bhi 342 and for all [i',j',k'] meeting those conditions, 343 k >= k' 344 i <= i' 345 and if i == i', j <= j' 346 347 In other words, of all maximal matching blocks, return one that 348 starts earliest in a, and of all those maximal matching blocks that 349 start earliest in a, return the one that starts earliest in b. 350 351 >>> isjunk = (x) -> x is ' ' 352 >>> s = new SequenceMatcher(isjunk, ' abcd', 'abcd abcd') 353 >>> s.findLongestMatch(0, 5, 0, 9) 354 [1, 0, 4] 355 356 >>> s = new SequenceMatcher(null, 'ab', 'c') 357 >>> s.findLongestMatch(0, 2, 0, 1) 358 [0, 0, 0] 359 */ 360 361 var a, b, b2j, besti, bestj, bestsize, i, isbjunk, j, j2len, k, newj2len, _i, _j, _len, _ref, _ref1, _ref2, _ref3, _ref4, _ref5; 362 _ref = [this.a, this.b, this.b2j, this.isbjunk], a = _ref[0], b = _ref[1], b2j = _ref[2], isbjunk = _ref[3]; 363 _ref1 = [alo, blo, 0], besti = _ref1[0], bestj = _ref1[1], bestsize = _ref1[2]; 364 j2len = {}; 365 for (i = _i = alo; alo <= ahi ? _i < ahi : _i > ahi; i = alo <= ahi ? ++_i : --_i) { 366 newj2len = {}; 367 _ref2 = (_has(b2j, a[i]) ? b2j[a[i]] : []); 368 for (_j = 0, _len = _ref2.length; _j < _len; _j++) { 369 j = _ref2[_j]; 370 if (j < blo) { 371 continue; 372 } 373 if (j >= bhi) { 374 break; 375 } 376 k = newj2len[j] = (j2len[j - 1] || 0) + 1; 377 if (k > bestsize) { 378 _ref3 = [i - k + 1, j - k + 1, k], besti = _ref3[0], bestj = _ref3[1], bestsize = _ref3[2]; 379 } 380 } 381 j2len = newj2len; 382 } 383 while (besti > alo && bestj > blo && !isbjunk(b[bestj - 1]) && a[besti - 1] === b[bestj - 1]) { 384 _ref4 = [besti - 1, bestj - 1, bestsize + 1], besti = _ref4[0], bestj = _ref4[1], bestsize = _ref4[2]; 385 } 386 while (besti + bestsize < ahi && bestj + bestsize < bhi && !isbjunk(b[bestj + bestsize]) && a[besti + bestsize] === b[bestj + bestsize]) { 387 bestsize++; 388 } 389 while (besti > alo && bestj > blo && isbjunk(b[bestj - 1]) && a[besti - 1] === b[bestj - 1]) { 390 _ref5 = [besti - 1, bestj - 1, bestsize + 1], besti = _ref5[0], bestj = _ref5[1], bestsize = _ref5[2]; 391 } 392 while (besti + bestsize < ahi && bestj + bestsize < bhi && isbjunk(b[bestj + bestsize]) && a[besti + bestsize] === b[bestj + bestsize]) { 393 bestsize++; 394 } 395 return [besti, bestj, bestsize]; 396 }; 397 398 SequenceMatcher.prototype.getMatchingBlocks = function() { 399 /* 400 Return list of triples describing matching subsequences. 401 402 Each triple is of the form [i, j, n], and means that 403 a[i...i+n] == b[j...j+n]. The triples are monotonically increasing in 404 i and in j. it's also guaranteed that if 405 [i, j, n] and [i', j', n'] are adjacent triples in the list, and 406 the second is not the last triple in the list, then i+n != i' or 407 j+n != j'. IOW, adjacent triples never describe adjacent equal 408 blocks. 409 410 The last triple is a dummy, [a.length, b.length, 0], and is the only 411 triple with n==0. 412 413 >>> s = new SequenceMatcher(null, 'abxcd', 'abcd') 414 >>> s.getMatchingBlocks() 415 [[0, 0, 2], [3, 2, 2], [5, 4, 0]] 416 */ 417 418 var ahi, alo, bhi, blo, i, i1, i2, j, j1, j2, k, k1, k2, la, lb, matchingBlocks, nonAdjacent, queue, x, _i, _len, _ref, _ref1, _ref2, _ref3, _ref4; 419 if (this.matchingBlocks) { 420 return this.matchingBlocks; 421 } 422 _ref = [this.a.length, this.b.length], la = _ref[0], lb = _ref[1]; 423 queue = [[0, la, 0, lb]]; 424 matchingBlocks = []; 425 while (queue.length) { 426 _ref1 = queue.pop(), alo = _ref1[0], ahi = _ref1[1], blo = _ref1[2], bhi = _ref1[3]; 427 _ref2 = x = this.findLongestMatch(alo, ahi, blo, bhi), i = _ref2[0], j = _ref2[1], k = _ref2[2]; 428 if (k) { 429 matchingBlocks.push(x); 430 if (alo < i && blo < j) { 431 queue.push([alo, i, blo, j]); 432 } 433 if (i + k < ahi && j + k < bhi) { 434 queue.push([i + k, ahi, j + k, bhi]); 435 } 436 } 437 } 438 matchingBlocks.sort(_arrayCmp); 439 i1 = j1 = k1 = 0; 440 nonAdjacent = []; 441 for (_i = 0, _len = matchingBlocks.length; _i < _len; _i++) { 442 _ref3 = matchingBlocks[_i], i2 = _ref3[0], j2 = _ref3[1], k2 = _ref3[2]; 443 if (i1 + k1 === i2 && j1 + k1 === j2) { 444 k1 += k2; 445 } else { 446 if (k1) { 447 nonAdjacent.push([i1, j1, k1]); 448 } 449 _ref4 = [i2, j2, k2], i1 = _ref4[0], j1 = _ref4[1], k1 = _ref4[2]; 450 } 451 } 452 if (k1) { 453 nonAdjacent.push([i1, j1, k1]); 454 } 455 nonAdjacent.push([la, lb, 0]); 456 return this.matchingBlocks = nonAdjacent; 457 }; 458 459 SequenceMatcher.prototype.getOpcodes = function() { 460 /* 461 Return list of 5-tuples describing how to turn a into b. 462 463 Each tuple is of the form [tag, i1, i2, j1, j2]. The first tuple 464 has i1 == j1 == 0, and remaining tuples have i1 == the i2 from the 465 tuple preceding it, and likewise for j1 == the previous j2. 466 467 The tags are strings, with these meanings: 468 469 'replace': a[i1...i2] should be replaced by b[j1...j2] 470 'delete': a[i1...i2] should be deleted. 471 Note that j1==j2 in this case. 472 'insert': b[j1...j2] should be inserted at a[i1...i1]. 473 Note that i1==i2 in this case. 474 'equal': a[i1...i2] == b[j1...j2] 475 476 >>> s = new SequenceMatcher(null, 'qabxcd', 'abycdf') 477 >>> s.getOpcodes() 478 [ [ 'delete' , 0 , 1 , 0 , 0 ] , 479 [ 'equal' , 1 , 3 , 0 , 2 ] , 480 [ 'replace' , 3 , 4 , 2 , 3 ] , 481 [ 'equal' , 4 , 6 , 3 , 5 ] , 482 [ 'insert' , 6 , 6 , 5 , 6 ] ] 483 */ 484 485 var ai, answer, bj, i, j, size, tag, _i, _len, _ref, _ref1, _ref2; 486 if (this.opcodes) { 487 return this.opcodes; 488 } 489 i = j = 0; 490 this.opcodes = answer = []; 491 _ref = this.getMatchingBlocks(); 492 for (_i = 0, _len = _ref.length; _i < _len; _i++) { 493 _ref1 = _ref[_i], ai = _ref1[0], bj = _ref1[1], size = _ref1[2]; 494 tag = ''; 495 if (i < ai && j < bj) { 496 tag = 'replace'; 497 } else if (i < ai) { 498 tag = 'delete'; 499 } else if (j < bj) { 500 tag = 'insert'; 501 } 502 if (tag) { 503 answer.push([tag, i, ai, j, bj]); 504 } 505 _ref2 = [ai + size, bj + size], i = _ref2[0], j = _ref2[1]; 506 if (size) { 507 answer.push(['equal', ai, i, bj, j]); 508 } 509 } 510 return answer; 511 }; 512 513 SequenceMatcher.prototype.getGroupedOpcodes = function(n) { 514 var codes, group, groups, i1, i2, j1, j2, nn, tag, _i, _len, _ref, _ref1, _ref2, _ref3; 515 if (n == null) { 516 n = 3; 517 } 518 /* 519 Isolate change clusters by eliminating ranges with no changes. 520 521 Return a list groups with upto n lines of context. 522 Each group is in the same format as returned by get_opcodes(). 523 524 >>> a = [1...40].map(String) 525 >>> b = a.slice() 526 >>> b[8...8] = 'i' 527 >>> b[20] += 'x' 528 >>> b[23...28] = [] 529 >>> b[30] += 'y' 530 >>> s = new SequenceMatcher(null, a, b) 531 >>> s.getGroupedOpcodes() 532 [ [ [ 'equal' , 5 , 8 , 5 , 8 ], 533 [ 'insert' , 8 , 8 , 8 , 9 ], 534 [ 'equal' , 8 , 11 , 9 , 12 ] ], 535 [ [ 'equal' , 16 , 19 , 17 , 20 ], 536 [ 'replace' , 19 , 20 , 20 , 21 ], 537 [ 'equal' , 20 , 22 , 21 , 23 ], 538 [ 'delete' , 22 , 27 , 23 , 23 ], 539 [ 'equal' , 27 , 30 , 23 , 26 ] ], 540 [ [ 'equal' , 31 , 34 , 27 , 30 ], 541 [ 'replace' , 34 , 35 , 30 , 31 ], 542 [ 'equal' , 35 , 38 , 31 , 34 ] ] ] 543 */ 544 545 codes = this.getOpcodes(); 546 if (!codes.length) { 547 codes = [['equal', 0, 1, 0, 1]]; 548 } 549 if (codes[0][0] === 'equal') { 550 _ref = codes[0], tag = _ref[0], i1 = _ref[1], i2 = _ref[2], j1 = _ref[3], j2 = _ref[4]; 551 codes[0] = [tag, max(i1, i2 - n), i2, max(j1, j2 - n), j2]; 552 } 553 if (codes[codes.length - 1][0] === 'equal') { 554 _ref1 = codes[codes.length - 1], tag = _ref1[0], i1 = _ref1[1], i2 = _ref1[2], j1 = _ref1[3], j2 = _ref1[4]; 555 codes[codes.length - 1] = [tag, i1, min(i2, i1 + n), j1, min(j2, j1 + n)]; 556 } 557 nn = n + n; 558 groups = []; 559 group = []; 560 for (_i = 0, _len = codes.length; _i < _len; _i++) { 561 _ref2 = codes[_i], tag = _ref2[0], i1 = _ref2[1], i2 = _ref2[2], j1 = _ref2[3], j2 = _ref2[4]; 562 if (tag === 'equal' && i2 - i1 > nn) { 563 group.push([tag, i1, min(i2, i1 + n), j1, min(j2, j1 + n)]); 564 groups.push(group); 565 group = []; 566 _ref3 = [max(i1, i2 - n), max(j1, j2 - n)], i1 = _ref3[0], j1 = _ref3[1]; 567 } 568 group.push([tag, i1, i2, j1, j2]); 569 } 570 if (group.length && !(group.length === 1 && group[0][0] === 'equal')) { 571 groups.push(group); 572 } 573 return groups; 574 }; 575 576 SequenceMatcher.prototype.ratio = function() { 577 /* 578 Return a measure of the sequences' similarity (float in [0,1]). 579 580 Where T is the total number of elements in both sequences, and 581 M is the number of matches, this is 2.0*M / T. 582 Note that this is 1 if the sequences are identical, and 0 if 583 they have nothing in common. 584 585 .ratio() is expensive to compute if you haven't already computed 586 .getMatchingBlocks() or .getOpcodes(), in which case you may 587 want to try .quickRatio() or .realQuickRatio() first to get an 588 upper bound. 589 590 >>> s = new SequenceMatcher(null, 'abcd', 'bcde') 591 >>> s.ratio() 592 0.75 593 >>> s.quickRatio() 594 0.75 595 >>> s.realQuickRatio() 596 1.0 597 */ 598 599 var match, matches, _i, _len, _ref; 600 matches = 0; 601 _ref = this.getMatchingBlocks(); 602 for (_i = 0, _len = _ref.length; _i < _len; _i++) { 603 match = _ref[_i]; 604 matches += match[2]; 605 } 606 return _calculateRatio(matches, this.a.length + this.b.length); 607 }; 608 609 SequenceMatcher.prototype.quickRatio = function() { 610 /* 611 Return an upper bound on ratio() relatively quickly. 612 613 This isn't defined beyond that it is an upper bound on .ratio(), and 614 is faster to compute. 615 */ 616 617 var avail, elt, fullbcount, matches, numb, _i, _j, _len, _len1, _ref, _ref1; 618 if (!this.fullbcount) { 619 this.fullbcount = fullbcount = {}; 620 _ref = this.b; 621 for (_i = 0, _len = _ref.length; _i < _len; _i++) { 622 elt = _ref[_i]; 623 fullbcount[elt] = (fullbcount[elt] || 0) + 1; 624 } 625 } 626 fullbcount = this.fullbcount; 627 avail = {}; 628 matches = 0; 629 _ref1 = this.a; 630 for (_j = 0, _len1 = _ref1.length; _j < _len1; _j++) { 631 elt = _ref1[_j]; 632 if (_has(avail, elt)) { 633 numb = avail[elt]; 634 } else { 635 numb = fullbcount[elt] || 0; 636 } 637 avail[elt] = numb - 1; 638 if (numb > 0) { 639 matches++; 640 } 641 } 642 return _calculateRatio(matches, this.a.length + this.b.length); 643 }; 644 645 SequenceMatcher.prototype.realQuickRatio = function() { 646 /* 647 Return an upper bound on ratio() very quickly. 648 649 This isn't defined beyond that it is an upper bound on .ratio(), and 650 is faster to compute than either .ratio() or .quickRatio(). 651 */ 652 653 var la, lb, _ref; 654 _ref = [this.a.length, this.b.length], la = _ref[0], lb = _ref[1]; 655 return _calculateRatio(min(la, lb), la + lb); 656 }; 657 658 return SequenceMatcher; 659 660 })(); 661 662 getCloseMatches = function(word, possibilities, n, cutoff) { 663 var result, s, score, x, _i, _j, _len, _len1, _ref, _results; 664 if (n == null) { 665 n = 3; 666 } 667 if (cutoff == null) { 668 cutoff = 0.6; 669 } 670 /* 671 Use SequenceMatcher to return list of the best "good enough" matches. 672 673 word is a sequence for which close matches are desired (typically a 674 string). 675 676 possibilities is a list of sequences against which to match word 677 (typically a list of strings). 678 679 Optional arg n (default 3) is the maximum number of close matches to 680 return. n must be > 0. 681 682 Optional arg cutoff (default 0.6) is a float in [0, 1]. Possibilities 683 that don't score at least that similar to word are ignored. 684 685 The best (no more than n) matches among the possibilities are returned 686 in a list, sorted by similarity score, most similar first. 687 688 >>> getCloseMatches('appel', ['ape', 'apple', 'peach', 'puppy']) 689 ['apple', 'ape'] 690 >>> KEYWORDS = require('coffee-script').RESERVED 691 >>> getCloseMatches('wheel', KEYWORDS) 692 ['when', 'while'] 693 >>> getCloseMatches('accost', KEYWORDS) 694 ['const'] 695 */ 696 697 if (!(n > 0)) { 698 throw new Error("n must be > 0: (" + n + ")"); 699 } 700 if (!((0.0 <= cutoff && cutoff <= 1.0))) { 701 throw new Error("cutoff must be in [0.0, 1.0]: (" + cutoff + ")"); 702 } 703 result = []; 704 s = new SequenceMatcher(); 705 s.setSeq2(word); 706 for (_i = 0, _len = possibilities.length; _i < _len; _i++) { 707 x = possibilities[_i]; 708 s.setSeq1(x); 709 if (s.realQuickRatio() >= cutoff && s.quickRatio() >= cutoff && s.ratio() >= cutoff) { 710 result.push([s.ratio(), x]); 711 } 712 } 713 result = Heap.nlargest(result, n, _arrayCmp); 714 _results = []; 715 for (_j = 0, _len1 = result.length; _j < _len1; _j++) { 716 _ref = result[_j], score = _ref[0], x = _ref[1]; 717 _results.push(x); 718 } 719 return _results; 720 }; 721 722 _countLeading = function(line, ch) { 723 /* 724 Return number of `ch` characters at the start of `line`. 725 726 >>> _countLeading(' abc', ' ') 727 3 728 */ 729 730 var i, n, _ref; 731 _ref = [0, line.length], i = _ref[0], n = _ref[1]; 732 while (i < n && line[i] === ch) { 733 i++; 734 } 735 return i; 736 }; 737 738 Differ = (function() { 739 740 Differ.name = 'Differ'; 741 742 /* 743 Differ is a class for comparing sequences of lines of text, and 744 producing human-readable differences or deltas. Differ uses 745 SequenceMatcher both to compare sequences of lines, and to compare 746 sequences of characters within similar (near-matching) lines. 747 748 Each line of a Differ delta begins with a two-letter code: 749 750 '- ' line unique to sequence 1 751 '+ ' line unique to sequence 2 752 ' ' line common to both sequences 753 '? ' line not present in either input sequence 754 755 Lines beginning with '? ' attempt to guide the eye to intraline 756 differences, and were not present in either input sequence. These lines 757 can be confusing if the sequences contain tab characters. 758 759 Note that Differ makes no claim to produce a *minimal* diff. To the 760 contrary, minimal diffs are often counter-intuitive, because they synch 761 up anywhere possible, sometimes accidental matches 100 pages apart. 762 Restricting synch points to contiguous matches preserves some notion of 763 locality, at the occasional cost of producing a longer diff. 764 765 Example: Comparing two texts. 766 767 >>> text1 = ['1. Beautiful is better than ugly.\n', 768 ... '2. Explicit is better than implicit.\n', 769 ... '3. Simple is better than complex.\n', 770 ... '4. Complex is better than complicated.\n'] 771 >>> text1.length 772 4 773 >>> text2 = ['1. Beautiful is better than ugly.\n', 774 ... '3. Simple is better than complex.\n', 775 ... '4. Complicated is better than complex.\n', 776 ... '5. Flat is better than nested.\n'] 777 778 Next we instantiate a Differ object: 779 780 >>> d = new Differ() 781 782 Note that when instantiating a Differ object we may pass functions to 783 filter out line and character 'junk'. 784 785 Finally, we compare the two: 786 787 >>> result = d.compare(text1, text2) 788 [ ' 1. Beautiful is better than ugly.\n', 789 '- 2. Explicit is better than implicit.\n', 790 '- 3. Simple is better than complex.\n', 791 '+ 3. Simple is better than complex.\n', 792 '? ++\n', 793 '- 4. Complex is better than complicated.\n', 794 '? ^ ---- ^\n', 795 '+ 4. Complicated is better than complex.\n', 796 '? ++++ ^ ^\n', 797 '+ 5. Flat is better than nested.\n' ] 798 799 Methods: 800 801 constructor(linejunk=null, charjunk=null) 802 Construct a text differencer, with optional filters. 803 compare(a, b) 804 Compare two sequences of lines; generate the resulting delta. 805 */ 806 807 808 function Differ(linejunk, charjunk) { 809 this.linejunk = linejunk; 810 this.charjunk = charjunk; 811 /* 812 Construct a text differencer, with optional filters. 813 814 The two optional keyword parameters are for filter functions: 815 816 - `linejunk`: A function that should accept a single string argument, 817 and return true iff the string is junk. The module-level function 818 `IS_LINE_JUNK` may be used to filter out lines without visible 819 characters, except for at most one splat ('#'). It is recommended 820 to leave linejunk null. 821 822 - `charjunk`: A function that should accept a string of length 1. The 823 module-level function `IS_CHARACTER_JUNK` may be used to filter out 824 whitespace characters (a blank or tab; **note**: bad idea to include 825 newline in this!). Use of IS_CHARACTER_JUNK is recommended. 826 */ 827 828 } 829 830 Differ.prototype.compare = function(a, b) { 831 /* 832 Compare two sequences of lines; generate the resulting delta. 833 834 Each sequence must contain individual single-line strings ending with 835 newlines. Such sequences can be obtained from the `readlines()` method 836 of file-like objects. The delta generated also consists of newline- 837 terminated strings, ready to be printed as-is via the writeline() 838 method of a file-like object. 839 840 Example: 841 842 >>> d = new Differ 843 >>> d.compare(['one\n', 'two\n', 'three\n'], 844 ... ['ore\n', 'tree\n', 'emu\n']) 845 [ '- one\n', 846 '? ^\n', 847 '+ ore\n', 848 '? ^\n', 849 '- two\n', 850 '- three\n', 851 '? -\n', 852 '+ tree\n', 853 '+ emu\n' ] 854 */ 855 856 var ahi, alo, bhi, blo, cruncher, g, line, lines, tag, _i, _j, _len, _len1, _ref, _ref1; 857 cruncher = new SequenceMatcher(this.linejunk, a, b); 858 lines = []; 859 _ref = cruncher.getOpcodes(); 860 for (_i = 0, _len = _ref.length; _i < _len; _i++) { 861 _ref1 = _ref[_i], tag = _ref1[0], alo = _ref1[1], ahi = _ref1[2], blo = _ref1[3], bhi = _ref1[4]; 862 switch (tag) { 863 case 'replace': 864 g = this._fancyReplace(a, alo, ahi, b, blo, bhi); 865 break; 866 case 'delete': 867 g = this._dump('-', a, alo, ahi); 868 break; 869 case 'insert': 870 g = this._dump('+', b, blo, bhi); 871 break; 872 case 'equal': 873 g = this._dump(' ', a, alo, ahi); 874 break; 875 default: 876 throw new Error("unknow tag (" + tag + ")"); 877 } 878 for (_j = 0, _len1 = g.length; _j < _len1; _j++) { 879 line = g[_j]; 880 lines.push(line); 881 } 882 } 883 return lines; 884 }; 885 886 Differ.prototype._dump = function(tag, x, lo, hi) { 887 /* 888 Generate comparison results for a same-tagged range. 889 */ 890 891 var i, _i, _results; 892 _results = []; 893 for (i = _i = lo; lo <= hi ? _i < hi : _i > hi; i = lo <= hi ? ++_i : --_i) { 894 _results.push("" + tag + " " + x[i]); 895 } 896 return _results; 897 }; 898 899 Differ.prototype._plainReplace = function(a, alo, ahi, b, blo, bhi) { 900 var first, g, line, lines, second, _i, _j, _len, _len1, _ref; 901 assert(alo < ahi && blo < bhi); 902 if (bhi - blo < ahi - alo) { 903 first = this._dump('+', b, blo, bhi); 904 second = this._dump('-', a, alo, ahi); 905 } else { 906 first = this._dump('-', a, alo, ahi); 907 second = this._dump('+', b, blo, bhi); 908 } 909 lines = []; 910 _ref = [first, second]; 911 for (_i = 0, _len = _ref.length; _i < _len; _i++) { 912 g = _ref[_i]; 913 for (_j = 0, _len1 = g.length; _j < _len1; _j++) { 914 line = g[_j]; 915 lines.push(line); 916 } 917 } 918 return lines; 919 }; 920 921 Differ.prototype._fancyReplace = function(a, alo, ahi, b, blo, bhi) { 922 /* 923 When replacing one block of lines with another, search the blocks 924 for *similar* lines; the best-matching pair (if any) is used as a 925 synch point, and intraline difference marking is done on the 926 similar pair. Lots of work, but often worth it. 927 928 Example: 929 >>> d = new Differ 930 >>> d._fancyReplace(['abcDefghiJkl\n'], 0, 1, 931 ... ['abcdefGhijkl\n'], 0, 1) 932 [ '- abcDefghiJkl\n', 933 '? ^ ^ ^\n', 934 '+ abcdefGhijkl\n', 935 '? ^ ^ ^\n' ] 936 */ 937 938 var aelt, ai, ai1, ai2, atags, belt, bestRatio, besti, bestj, bj, bj1, bj2, btags, cruncher, cutoff, eqi, eqj, i, j, la, lb, line, lines, tag, _i, _j, _k, _l, _len, _len1, _len2, _len3, _len4, _m, _n, _o, _ref, _ref1, _ref10, _ref11, _ref12, _ref2, _ref3, _ref4, _ref5, _ref6, _ref7, _ref8, _ref9; 939 _ref = [0.74, 0.75], bestRatio = _ref[0], cutoff = _ref[1]; 940 cruncher = new SequenceMatcher(this.charjunk); 941 _ref1 = [null, null], eqi = _ref1[0], eqj = _ref1[1]; 942 lines = []; 943 for (j = _i = blo; blo <= bhi ? _i < bhi : _i > bhi; j = blo <= bhi ? ++_i : --_i) { 944 bj = b[j]; 945 cruncher.setSeq2(bj); 946 for (i = _j = alo; alo <= ahi ? _j < ahi : _j > ahi; i = alo <= ahi ? ++_j : --_j) { 947 ai = a[i]; 948 if (ai === bj) { 949 if (eqi === null) { 950 _ref2 = [i, j], eqi = _ref2[0], eqj = _ref2[1]; 951 } 952 continue; 953 } 954 cruncher.setSeq1(ai); 955 if (cruncher.realQuickRatio() > bestRatio && cruncher.quickRatio() > bestRatio && cruncher.ratio() > bestRatio) { 956 _ref3 = [cruncher.ratio(), i, j], bestRatio = _ref3[0], besti = _ref3[1], bestj = _ref3[2]; 957 } 958 } 959 } 960 if (bestRatio < cutoff) { 961 if (eqi === null) { 962 _ref4 = this._plainReplace(a, alo, ahi, b, blo, bhi); 963 for (_k = 0, _len = _ref4.length; _k < _len; _k++) { 964 line = _ref4[_k]; 965 lines.push(line); 966 } 967 return lines; 968 } 969 _ref5 = [eqi, eqj, 1.0], besti = _ref5[0], bestj = _ref5[1], bestRatio = _ref5[2]; 970 } else { 971 eqi = null; 972 } 973 _ref6 = this._fancyHelper(a, alo, besti, b, blo, bestj); 974 for (_l = 0, _len1 = _ref6.length; _l < _len1; _l++) { 975 line = _ref6[_l]; 976 lines.push(line); 977 } 978 _ref7 = [a[besti], b[bestj]], aelt = _ref7[0], belt = _ref7[1]; 979 if (eqi === null) { 980 atags = btags = ''; 981 cruncher.setSeqs(aelt, belt); 982 _ref8 = cruncher.getOpcodes(); 983 for (_m = 0, _len2 = _ref8.length; _m < _len2; _m++) { 984 _ref9 = _ref8[_m], tag = _ref9[0], ai1 = _ref9[1], ai2 = _ref9[2], bj1 = _ref9[3], bj2 = _ref9[4]; 985 _ref10 = [ai2 - ai1, bj2 - bj1], la = _ref10[0], lb = _ref10[1]; 986 switch (tag) { 987 case 'replace': 988 atags += Array(la + 1).join('^'); 989 btags += Array(lb + 1).join('^'); 990 break; 991 case 'delete': 992 atags += Array(la + 1).join('-'); 993 break; 994 case 'insert': 995 btags += Array(lb + 1).join('+'); 996 break; 997 case 'equal': 998 atags += Array(la + 1).join(' '); 999 btags += Array(lb + 1).join(' '); 1000 break; 1001 default: 1002 throw new Error("unknow tag (" + tag + ")"); 1003 } 1004 } 1005 _ref11 = this._qformat(aelt, belt, atags, btags); 1006 for (_n = 0, _len3 = _ref11.length; _n < _len3; _n++) { 1007 line = _ref11[_n]; 1008 lines.push(line); 1009 } 1010 } else { 1011 lines.push(' ' + aelt); 1012 } 1013 _ref12 = this._fancyHelper(a, besti + 1, ahi, b, bestj + 1, bhi); 1014 for (_o = 0, _len4 = _ref12.length; _o < _len4; _o++) { 1015 line = _ref12[_o]; 1016 lines.push(line); 1017 } 1018 return lines; 1019 }; 1020 1021 Differ.prototype._fancyHelper = function(a, alo, ahi, b, blo, bhi) { 1022 var g; 1023 g = []; 1024 if (alo < ahi) { 1025 if (blo < bhi) { 1026 g = this._fancyReplace(a, alo, ahi, b, blo, bhi); 1027 } else { 1028 g = this._dump('-', a, alo, ahi); 1029 } 1030 } else if (blo < bhi) { 1031 g = this._dump('+', b, blo, bhi); 1032 } 1033 return g; 1034 }; 1035 1036 Differ.prototype._qformat = function(aline, bline, atags, btags) { 1037 /* 1038 Format "?" output and deal with leading tabs. 1039 1040 Example: 1041 1042 >>> d = new Differ 1043 >>> d._qformat('\tabcDefghiJkl\n', '\tabcdefGhijkl\n', 1044 [ '- \tabcDefghiJkl\n', 1045 '? \t ^ ^ ^\n', 1046 '+ \tabcdefGhijkl\n', 1047 '? \t ^ ^ ^\n' ] 1048 */ 1049 1050 var common, lines; 1051 lines = []; 1052 common = min(_countLeading(aline, '\t'), _countLeading(bline, '\t')); 1053 common = min(common, _countLeading(atags.slice(0, common), ' ')); 1054 common = min(common, _countLeading(btags.slice(0, common), ' ')); 1055 atags = atags.slice(common).replace(/\s+$/, ''); 1056 btags = btags.slice(common).replace(/\s+$/, ''); 1057 lines.push('- ' + aline); 1058 if (atags.length) { 1059 lines.push("? " + (Array(common + 1).join('\t')) + atags + "\n"); 1060 } 1061 lines.push('+ ' + bline); 1062 if (btags.length) { 1063 lines.push("? " + (Array(common + 1).join('\t')) + btags + "\n"); 1064 } 1065 return lines; 1066 }; 1067 1068 return Differ; 1069 1070 })(); 1071 1072 IS_LINE_JUNK = function(line, pat) { 1073 if (pat == null) { 1074 pat = /^\s*#?\s*$/; 1075 } 1076 /* 1077 Return 1 for ignorable line: iff `line` is blank or contains a single '#'. 1078 1079 Examples: 1080 1081 >>> IS_LINE_JUNK('\n') 1082 true 1083 >>> IS_LINE_JUNK(' # \n') 1084 true 1085 >>> IS_LINE_JUNK('hello\n') 1086 false 1087 */ 1088 1089 return pat.test(line); 1090 }; 1091 1092 IS_CHARACTER_JUNK = function(ch, ws) { 1093 if (ws == null) { 1094 ws = ' \t'; 1095 } 1096 /* 1097 Return 1 for ignorable character: iff `ch` is a space or tab. 1098 1099 Examples: 1100 >>> IS_CHARACTER_JUNK(' ').should.be.true 1101 true 1102 >>> IS_CHARACTER_JUNK('\t').should.be.true 1103 true 1104 >>> IS_CHARACTER_JUNK('\n').should.be.false 1105 false 1106 >>> IS_CHARACTER_JUNK('x').should.be.false 1107 false 1108 */ 1109 1110 return __indexOf.call(ws, ch) >= 0; 1111 }; 1112 1113 _formatRangeUnified = function(start, stop) { 1114 /* 1115 Convert range to the "ed" format' 1116 */ 1117 1118 var beginning, length; 1119 beginning = start + 1; 1120 length = stop - start; 1121 if (length === 1) { 1122 return "" + beginning; 1123 } 1124 if (!length) { 1125 beginning--; 1126 } 1127 return "" + beginning + "," + length; 1128 }; 1129 1130 unifiedDiff = function(a, b, _arg) { 1131 var file1Range, file2Range, first, fromdate, fromfile, fromfiledate, group, i1, i2, j1, j2, last, line, lines, lineterm, n, started, tag, todate, tofile, tofiledate, _i, _j, _k, _l, _len, _len1, _len2, _len3, _len4, _m, _ref, _ref1, _ref2, _ref3, _ref4, _ref5, _ref6; 1132 _ref = _arg != null ? _arg : {}, fromfile = _ref.fromfile, tofile = _ref.tofile, fromfiledate = _ref.fromfiledate, tofiledate = _ref.tofiledate, n = _ref.n, lineterm = _ref.lineterm; 1133 /* 1134 Compare two sequences of lines; generate the delta as a unified diff. 1135 1136 Unified diffs are a compact way of showing line changes and a few 1137 lines of context. The number of context lines is set by 'n' which 1138 defaults to three. 1139 1140 By default, the diff control lines (those with ---, +++, or @@) are 1141 created with a trailing newline. 1142 1143 For inputs that do not have trailing newlines, set the lineterm 1144 argument to "" so that the output will be uniformly newline free. 1145 1146 The unidiff format normally has a header for filenames and modification 1147 times. Any or all of these may be specified using strings for 1148 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. 1149 The modification times are normally expressed in the ISO 8601 format. 1150 1151 Example: 1152 1153 >>> unifiedDiff('one two three four'.split(' '), 1154 ... 'zero one tree four'.split(' '), { 1155 ... fromfile: 'Original' 1156 ... tofile: 'Current', 1157 ... fromfiledate: '2005-01-26 23:30:50', 1158 ... tofiledate: '2010-04-02 10:20:52', 1159 ... lineterm: '' 1160 ... }) 1161 [ '--- Original\t2005-01-26 23:30:50', 1162 '+++ Current\t2010-04-02 10:20:52', 1163 '@@ -1,4 +1,4 @@', 1164 '+zero', 1165 ' one', 1166 '-two', 1167 '-three', 1168 '+tree', 1169 ' four' ] 1170 */ 1171 1172 if (fromfile == null) { 1173 fromfile = ''; 1174 } 1175 if (tofile == null) { 1176 tofile = ''; 1177 } 1178 if (fromfiledate == null) { 1179 fromfiledate = ''; 1180 } 1181 if (tofiledate == null) { 1182 tofiledate = ''; 1183 } 1184 if (n == null) { 1185 n = 3; 1186 } 1187 if (lineterm == null) { 1188 lineterm = '\n'; 1189 } 1190 lines = []; 1191 started = false; 1192 _ref1 = (new SequenceMatcher(null, a, b)).getGroupedOpcodes(); 1193 for (_i = 0, _len = _ref1.length; _i < _len; _i++) { 1194 group = _ref1[_i]; 1195 if (!started) { 1196 started = true; 1197 fromdate = fromfiledate ? "\t" + fromfiledate : ''; 1198 todate = tofiledate ? "\t" + tofiledate : ''; 1199 lines.push("--- " + fromfile + fromdate + lineterm); 1200 lines.push("+++ " + tofile + todate + lineterm); 1201 } 1202 _ref2 = [group[0], group[group.length - 1]], first = _ref2[0], last = _ref2[1]; 1203 file1Range = _formatRangeUnified(first[1], last[2]); 1204 file2Range = _formatRangeUnified(first[3], last[4]); 1205 lines.push("@@ -" + file1Range + " +" + file2Range + " @@" + lineterm); 1206 for (_j = 0, _len1 = group.length; _j < _len1; _j++) { 1207 _ref3 = group[_j], tag = _ref3[0], i1 = _ref3[1], i2 = _ref3[2], j1 = _ref3[3], j2 = _ref3[4]; 1208 if (tag === 'equal') { 1209 _ref4 = a.slice(i1, i2); 1210 for (_k = 0, _len2 = _ref4.length; _k < _len2; _k++) { 1211 line = _ref4[_k]; 1212 lines.push(' ' + line); 1213 } 1214 continue; 1215 } 1216 if (tag === 'replace' || tag === 'delete') { 1217 _ref5 = a.slice(i1, i2); 1218 for (_l = 0, _len3 = _ref5.length; _l < _len3; _l++) { 1219 line = _ref5[_l]; 1220 lines.push('-' + line); 1221 } 1222 } 1223 if (tag === 'replace' || tag === 'insert') { 1224 _ref6 = b.slice(j1, j2); 1225 for (_m = 0, _len4 = _ref6.length; _m < _len4; _m++) { 1226 line = _ref6[_m]; 1227 lines.push('+' + line); 1228 } 1229 } 1230 } 1231 } 1232 return lines; 1233 }; 1234 1235 _formatRangeContext = function(start, stop) { 1236 /* 1237 Convert range to the "ed" format' 1238 */ 1239 1240 var beginning, length; 1241 beginning = start + 1; 1242 length = stop - start; 1243 if (!length) { 1244 beginning--; 1245 } 1246 if (length <= 1) { 1247 return "" + beginning; 1248 } 1249 return "" + beginning + "," + (beginning + length - 1); 1250 }; 1251 1252 contextDiff = function(a, b, _arg) { 1253 var file1Range, file2Range, first, fromdate, fromfile, fromfiledate, group, i1, i2, j1, j2, last, line, lines, lineterm, n, prefix, started, tag, todate, tofile, tofiledate, _, _i, _j, _k, _l, _len, _len1, _len2, _len3, _len4, _m, _ref, _ref1, _ref2, _ref3, _ref4, _ref5, _ref6; 1254 _ref = _arg != null ? _arg : {}, fromfile = _ref.fromfile, tofile = _ref.tofile, fromfiledate = _ref.fromfiledate, tofiledate = _ref.tofiledate, n = _ref.n, lineterm = _ref.lineterm; 1255 /* 1256 Compare two sequences of lines; generate the delta as a context diff. 1257 1258 Context diffs are a compact way of showing line changes and a few 1259 lines of context. The number of context lines is set by 'n' which 1260 defaults to three. 1261 1262 By default, the diff control lines (those with *** or ---) are 1263 created with a trailing newline. This is helpful so that inputs 1264 created from file.readlines() result in diffs that are suitable for 1265 file.writelines() since both the inputs and outputs have trailing 1266 newlines. 1267 1268 For inputs that do not have trailing newlines, set the lineterm 1269 argument to "" so that the output will be uniformly newline free. 1270 1271 The context diff format normally has a header for filenames and 1272 modification times. Any or all of these may be specified using 1273 strings for 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. 1274 The modification times are normally expressed in the ISO 8601 format. 1275 If not specified, the strings default to blanks. 1276 1277 Example: 1278 >>> a = ['one\n', 'two\n', 'three\n', 'four\n'] 1279 >>> b = ['zero\n', 'one\n', 'tree\n', 'four\n'] 1280 >>> contextDiff(a, b, {fromfile: 'Original', tofile: 'Current'}) 1281 [ '*** Original\n', 1282 '--- Current\n', 1283 '***************\n', 1284 '*** 1,4 ****\n', 1285 ' one\n', 1286 '! two\n', 1287 '! three\n', 1288 ' four\n', 1289 '--- 1,4 ----\n', 1290 '+ zero\n', 1291 ' one\n', 1292 '! tree\n', 1293 ' four\n' ] 1294 */ 1295 1296 if (fromfile == null) { 1297 fromfile = ''; 1298 } 1299 if (tofile == null) { 1300 tofile = ''; 1301 } 1302 if (fromfiledate == null) { 1303 fromfiledate = ''; 1304 } 1305 if (tofiledate == null) { 1306 tofiledate = ''; 1307 } 1308 if (n == null) { 1309 n = 3; 1310 } 1311 if (lineterm == null) { 1312 lineterm = '\n'; 1313 } 1314 prefix = { 1315 insert: '+ ', 1316 "delete": '- ', 1317 replace: '! ', 1318 equal: ' ' 1319 }; 1320 started = false; 1321 lines = []; 1322 _ref1 = (new SequenceMatcher(null, a, b)).getGroupedOpcodes(); 1323 for (_i = 0, _len = _ref1.length; _i < _len; _i++) { 1324 group = _ref1[_i]; 1325 if (!started) { 1326 started = true; 1327 fromdate = fromfiledate ? "\t" + fromfiledate : ''; 1328 todate = tofiledate ? "\t" + tofiledate : ''; 1329 lines.push("*** " + fromfile + fromdate + lineterm); 1330 lines.push("--- " + tofile + todate + lineterm); 1331 _ref2 = [group[0], group[group.length - 1]], first = _ref2[0], last = _ref2[1]; 1332 lines.push('***************' + lineterm); 1333 file1Range = _formatRangeContext(first[1], last[2]); 1334 lines.push("*** " + file1Range + " ****" + lineterm); 1335 if (_any((function() { 1336 var _j, _len1, _ref3, _results; 1337 _results = []; 1338 for (_j = 0, _len1 = group.length; _j < _len1; _j++) { 1339 _ref3 = group[_j], tag = _ref3[0], _ = _ref3[1], _ = _ref3[2], _ = _ref3[3], _ = _ref3[4]; 1340 _results.push(tag === 'replace' || tag === 'delete'); 1341 } 1342 return _results; 1343 })())) { 1344 for (_j = 0, _len1 = group.length; _j < _len1; _j++) { 1345 _ref3 = group[_j], tag = _ref3[0], i1 = _ref3[1], i2 = _ref3[2], _ = _ref3[3], _ = _ref3[4]; 1346 if (tag !== 'insert') { 1347 _ref4 = a.slice(i1, i2); 1348 for (_k = 0, _len2 = _ref4.length; _k < _len2; _k++) { 1349 line = _ref4[_k]; 1350 lines.push(prefix[tag] + line); 1351 } 1352 } 1353 } 1354 } 1355 file2Range = _formatRangeContext(first[3], last[4]); 1356 lines.push("--- " + file2Range + " ----" + lineterm); 1357 if (_any((function() { 1358 var _l, _len3, _ref5, _results; 1359 _results = []; 1360 for (_l = 0, _len3 = group.length; _l < _len3; _l++) { 1361 _ref5 = group[_l], tag = _ref5[0], _ = _ref5[1], _ = _ref5[2], _ = _ref5[3], _ = _ref5[4]; 1362 _results.push(tag === 'replace' || tag === 'insert'); 1363 } 1364 return _results; 1365 })())) { 1366 for (_l = 0, _len3 = group.length; _l < _len3; _l++) { 1367 _ref5 = group[_l], tag = _ref5[0], _ = _ref5[1], _ = _ref5[2], j1 = _ref5[3], j2 = _ref5[4]; 1368 if (tag !== 'delete') { 1369 _ref6 = b.slice(j1, j2); 1370 for (_m = 0, _len4 = _ref6.length; _m < _len4; _m++) { 1371 line = _ref6[_m]; 1372 lines.push(prefix[tag] + line); 1373 } 1374 } 1375 } 1376 } 1377 } 1378 } 1379 return lines; 1380 }; 1381 1382 ndiff = function(a, b, linejunk, charjunk) { 1383 if (charjunk == null) { 1384 charjunk = IS_CHARACTER_JUNK; 1385 } 1386 /* 1387 Compare `a` and `b` (lists of strings); return a `Differ`-style delta. 1388 1389 Optional keyword parameters `linejunk` and `charjunk` are for filter 1390 functions (or None): 1391 1392 - linejunk: A function that should accept a single string argument, and 1393 return true iff the string is junk. The default is null, and is 1394 recommended; 1395 1396 - charjunk: A function that should accept a string of length 1. The 1397 default is module-level function IS_CHARACTER_JUNK, which filters out 1398 whitespace characters (a blank or tab; note: bad idea to include newline 1399 in this!). 1400 1401 Example: 1402 >>> a = ['one\n', 'two\n', 'three\n'] 1403 >>> b = ['ore\n', 'tree\n', 'emu\n'] 1404 >>> ndiff(a, b) 1405 [ '- one\n', 1406 '? ^\n', 1407 '+ ore\n', 1408 '? ^\n', 1409 '- two\n', 1410 '- three\n', 1411 '? -\n', 1412 '+ tree\n', 1413 '+ emu\n' ] 1414 */ 1415 1416 return (new Differ(linejunk, charjunk)).compare(a, b); 1417 }; 1418 1419 restore = function(delta, which) { 1420 /* 1421 Generate one of the two sequences that generated a delta. 1422 1423 Given a `delta` produced by `Differ.compare()` or `ndiff()`, extract 1424 lines originating from file 1 or 2 (parameter `which`), stripping off line 1425 prefixes. 1426 1427 Examples: 1428 >>> a = ['one\n', 'two\n', 'three\n'] 1429 >>> b = ['ore\n', 'tree\n', 'emu\n'] 1430 >>> diff = ndiff(a, b) 1431 >>> restore(diff, 1) 1432 [ 'one\n', 1433 'two\n', 1434 'three\n' ] 1435 >>> restore(diff, 2) 1436 [ 'ore\n', 1437 'tree\n', 1438 'emu\n' ] 1439 */ 1440 1441 var line, lines, prefixes, tag, _i, _len, _ref; 1442 tag = { 1443 1: '- ', 1444 2: '+ ' 1445 }[which]; 1446 if (!tag) { 1447 throw new Error("unknow delta choice (must be 1 or 2): " + which); 1448 } 1449 prefixes = [' ', tag]; 1450 lines = []; 1451 for (_i = 0, _len = delta.length; _i < _len; _i++) { 1452 line = delta[_i]; 1453 if (_ref = line.slice(0, 2), __indexOf.call(prefixes, _ref) >= 0) { 1454 lines.push(line.slice(2)); 1455 } 1456 } 1457 return lines; 1458 }; 1459 1460 exports._arrayCmp = _arrayCmp; 1461 1462 exports.SequenceMatcher = SequenceMatcher; 1463 1464 exports.getCloseMatches = getCloseMatches; 1465 1466 exports._countLeading = _countLeading; 1467 1468 exports.Differ = Differ; 1469 1470 exports.IS_LINE_JUNK = IS_LINE_JUNK; 1471 1472 exports.IS_CHARACTER_JUNK = IS_CHARACTER_JUNK; 1473 1474 exports._formatRangeUnified = _formatRangeUnified; 1475 1476 exports.unifiedDiff = unifiedDiff; 1477 1478 exports._formatRangeContext = _formatRangeContext; 1479 1480 exports.contextDiff = contextDiff; 1481 1482 exports.ndiff = ndiff; 1483 1484 exports.restore = restore; 1485 1486 }).call(this);