indexOf.js
  1  'use strict'
  2  
  3  const tape = require('tape')
  4  const BufferList = require('../')
  5  const { Buffer } = require('buffer')
  6  
  7  tape('indexOf single byte needle', (t) => {
  8    const bl = new BufferList(['abcdefg', 'abcdefg', '12345'])
  9  
 10    t.equal(bl.indexOf('e'), 4)
 11    t.equal(bl.indexOf('e', 5), 11)
 12    t.equal(bl.indexOf('e', 12), -1)
 13    t.equal(bl.indexOf('5'), 18)
 14  
 15    t.end()
 16  })
 17  
 18  tape('indexOf multiple byte needle', (t) => {
 19    const bl = new BufferList(['abcdefg', 'abcdefg'])
 20  
 21    t.equal(bl.indexOf('ef'), 4)
 22    t.equal(bl.indexOf('ef', 5), 11)
 23  
 24    t.end()
 25  })
 26  
 27  tape('indexOf multiple byte needles across buffer boundaries', (t) => {
 28    const bl = new BufferList(['abcdefg', 'abcdefg'])
 29  
 30    t.equal(bl.indexOf('fgabc'), 5)
 31  
 32    t.end()
 33  })
 34  
 35  tape('indexOf takes a Uint8Array search', (t) => {
 36    const bl = new BufferList(['abcdefg', 'abcdefg'])
 37    const search = new Uint8Array([102, 103, 97, 98, 99]) // fgabc
 38  
 39    t.equal(bl.indexOf(search), 5)
 40  
 41    t.end()
 42  })
 43  
 44  tape('indexOf takes a buffer list search', (t) => {
 45    const bl = new BufferList(['abcdefg', 'abcdefg'])
 46    const search = new BufferList('fgabc')
 47  
 48    t.equal(bl.indexOf(search), 5)
 49  
 50    t.end()
 51  })
 52  
 53  tape('indexOf a zero byte needle', (t) => {
 54    const b = new BufferList('abcdef')
 55    const bufEmpty = Buffer.from('')
 56  
 57    t.equal(b.indexOf(''), 0)
 58    t.equal(b.indexOf('', 1), 1)
 59    t.equal(b.indexOf('', b.length + 1), b.length)
 60    t.equal(b.indexOf('', Infinity), b.length)
 61    t.equal(b.indexOf(bufEmpty), 0)
 62    t.equal(b.indexOf(bufEmpty, 1), 1)
 63    t.equal(b.indexOf(bufEmpty, b.length + 1), b.length)
 64    t.equal(b.indexOf(bufEmpty, Infinity), b.length)
 65  
 66    t.end()
 67  })
 68  
 69  tape('indexOf buffers smaller and larger than the needle', (t) => {
 70    const bl = new BufferList(['abcdefg', 'a', 'bcdefg', 'a', 'bcfgab'])
 71  
 72    t.equal(bl.indexOf('fgabc'), 5)
 73    t.equal(bl.indexOf('fgabc', 6), 12)
 74    t.equal(bl.indexOf('fgabc', 13), -1)
 75  
 76    t.end()
 77  })
 78  
 79  // only present in node 6+
 80  ;(process.version.substr(1).split('.')[0] >= 6) && tape('indexOf latin1 and binary encoding', (t) => {
 81    const b = new BufferList('abcdef')
 82  
 83    // test latin1 encoding
 84    t.equal(
 85      new BufferList(Buffer.from(b.toString('latin1'), 'latin1'))
 86        .indexOf('d', 0, 'latin1'),
 87      3
 88    )
 89    t.equal(
 90      new BufferList(Buffer.from(b.toString('latin1'), 'latin1'))
 91        .indexOf(Buffer.from('d', 'latin1'), 0, 'latin1'),
 92      3
 93    )
 94    t.equal(
 95      new BufferList(Buffer.from('aa\u00e8aa', 'latin1'))
 96        .indexOf('\u00e8', 'latin1'),
 97      2
 98    )
 99    t.equal(
100      new BufferList(Buffer.from('\u00e8', 'latin1'))
101        .indexOf('\u00e8', 'latin1'),
102      0
103    )
104    t.equal(
105      new BufferList(Buffer.from('\u00e8', 'latin1'))
106        .indexOf(Buffer.from('\u00e8', 'latin1'), 'latin1'),
107      0
108    )
109  
110    // test binary encoding
111    t.equal(
112      new BufferList(Buffer.from(b.toString('binary'), 'binary'))
113        .indexOf('d', 0, 'binary'),
114      3
115    )
116    t.equal(
117      new BufferList(Buffer.from(b.toString('binary'), 'binary'))
118        .indexOf(Buffer.from('d', 'binary'), 0, 'binary'),
119      3
120    )
121    t.equal(
122      new BufferList(Buffer.from('aa\u00e8aa', 'binary'))
123        .indexOf('\u00e8', 'binary'),
124      2
125    )
126    t.equal(
127      new BufferList(Buffer.from('\u00e8', 'binary'))
128        .indexOf('\u00e8', 'binary'),
129      0
130    )
131    t.equal(
132      new BufferList(Buffer.from('\u00e8', 'binary'))
133        .indexOf(Buffer.from('\u00e8', 'binary'), 'binary'),
134      0
135    )
136  
137    t.end()
138  })
139  
140  tape('indexOf the entire nodejs10 buffer test suite', (t) => {
141    const b = new BufferList('abcdef')
142    const bufA = Buffer.from('a')
143    const bufBc = Buffer.from('bc')
144    const bufF = Buffer.from('f')
145    const bufZ = Buffer.from('z')
146  
147    const stringComparison = 'abcdef'
148  
149    t.equal(b.indexOf('a'), 0)
150    t.equal(b.indexOf('a', 1), -1)
151    t.equal(b.indexOf('a', -1), -1)
152    t.equal(b.indexOf('a', -4), -1)
153    t.equal(b.indexOf('a', -b.length), 0)
154    t.equal(b.indexOf('a', NaN), 0)
155    t.equal(b.indexOf('a', -Infinity), 0)
156    t.equal(b.indexOf('a', Infinity), -1)
157    t.equal(b.indexOf('bc'), 1)
158    t.equal(b.indexOf('bc', 2), -1)
159    t.equal(b.indexOf('bc', -1), -1)
160    t.equal(b.indexOf('bc', -3), -1)
161    t.equal(b.indexOf('bc', -5), 1)
162    t.equal(b.indexOf('bc', NaN), 1)
163    t.equal(b.indexOf('bc', -Infinity), 1)
164    t.equal(b.indexOf('bc', Infinity), -1)
165    t.equal(b.indexOf('f'), b.length - 1)
166    t.equal(b.indexOf('z'), -1)
167  
168    // empty search tests
169    t.equal(b.indexOf(bufA), 0)
170    t.equal(b.indexOf(bufA, 1), -1)
171    t.equal(b.indexOf(bufA, -1), -1)
172    t.equal(b.indexOf(bufA, -4), -1)
173    t.equal(b.indexOf(bufA, -b.length), 0)
174    t.equal(b.indexOf(bufA, NaN), 0)
175    t.equal(b.indexOf(bufA, -Infinity), 0)
176    t.equal(b.indexOf(bufA, Infinity), -1)
177    t.equal(b.indexOf(bufBc), 1)
178    t.equal(b.indexOf(bufBc, 2), -1)
179    t.equal(b.indexOf(bufBc, -1), -1)
180    t.equal(b.indexOf(bufBc, -3), -1)
181    t.equal(b.indexOf(bufBc, -5), 1)
182    t.equal(b.indexOf(bufBc, NaN), 1)
183    t.equal(b.indexOf(bufBc, -Infinity), 1)
184    t.equal(b.indexOf(bufBc, Infinity), -1)
185    t.equal(b.indexOf(bufF), b.length - 1)
186    t.equal(b.indexOf(bufZ), -1)
187    t.equal(b.indexOf(0x61), 0)
188    t.equal(b.indexOf(0x61, 1), -1)
189    t.equal(b.indexOf(0x61, -1), -1)
190    t.equal(b.indexOf(0x61, -4), -1)
191    t.equal(b.indexOf(0x61, -b.length), 0)
192    t.equal(b.indexOf(0x61, NaN), 0)
193    t.equal(b.indexOf(0x61, -Infinity), 0)
194    t.equal(b.indexOf(0x61, Infinity), -1)
195    t.equal(b.indexOf(0x0), -1)
196  
197    // test offsets
198    t.equal(b.indexOf('d', 2), 3)
199    t.equal(b.indexOf('f', 5), 5)
200    t.equal(b.indexOf('f', -1), 5)
201    t.equal(b.indexOf('f', 6), -1)
202  
203    t.equal(b.indexOf(Buffer.from('d'), 2), 3)
204    t.equal(b.indexOf(Buffer.from('f'), 5), 5)
205    t.equal(b.indexOf(Buffer.from('f'), -1), 5)
206    t.equal(b.indexOf(Buffer.from('f'), 6), -1)
207  
208    t.equal(Buffer.from('ff').indexOf(Buffer.from('f'), 1, 'ucs2'), -1)
209  
210    // test invalid and uppercase encoding
211    t.equal(b.indexOf('b', 'utf8'), 1)
212    t.equal(b.indexOf('b', 'UTF8'), 1)
213    t.equal(b.indexOf('62', 'HEX'), 1)
214    t.throws(() => b.indexOf('bad', 'enc'), TypeError)
215  
216    // test hex encoding
217    t.equal(
218      Buffer.from(b.toString('hex'), 'hex')
219        .indexOf('64', 0, 'hex'),
220      3
221    )
222    t.equal(
223      Buffer.from(b.toString('hex'), 'hex')
224        .indexOf(Buffer.from('64', 'hex'), 0, 'hex'),
225      3
226    )
227  
228    // test base64 encoding
229    t.equal(
230      Buffer.from(b.toString('base64'), 'base64')
231        .indexOf('ZA==', 0, 'base64'),
232      3
233    )
234    t.equal(
235      Buffer.from(b.toString('base64'), 'base64')
236        .indexOf(Buffer.from('ZA==', 'base64'), 0, 'base64'),
237      3
238    )
239  
240    // test ascii encoding
241    t.equal(
242      Buffer.from(b.toString('ascii'), 'ascii')
243        .indexOf('d', 0, 'ascii'),
244      3
245    )
246    t.equal(
247      Buffer.from(b.toString('ascii'), 'ascii')
248        .indexOf(Buffer.from('d', 'ascii'), 0, 'ascii'),
249      3
250    )
251  
252    // test optional offset with passed encoding
253    t.equal(Buffer.from('aaaa0').indexOf('30', 'hex'), 4)
254    t.equal(Buffer.from('aaaa00a').indexOf('3030', 'hex'), 4)
255  
256    {
257      // test usc2 encoding
258      const twoByteString = Buffer.from('\u039a\u0391\u03a3\u03a3\u0395', 'ucs2')
259  
260      t.equal(8, twoByteString.indexOf('\u0395', 4, 'ucs2'))
261      t.equal(6, twoByteString.indexOf('\u03a3', -4, 'ucs2'))
262      t.equal(4, twoByteString.indexOf('\u03a3', -6, 'ucs2'))
263      t.equal(4, twoByteString.indexOf(
264        Buffer.from('\u03a3', 'ucs2'), -6, 'ucs2'))
265      t.equal(-1, twoByteString.indexOf('\u03a3', -2, 'ucs2'))
266    }
267  
268    const mixedByteStringUcs2 =
269        Buffer.from('\u039a\u0391abc\u03a3\u03a3\u0395', 'ucs2')
270  
271    t.equal(6, mixedByteStringUcs2.indexOf('bc', 0, 'ucs2'))
272    t.equal(10, mixedByteStringUcs2.indexOf('\u03a3', 0, 'ucs2'))
273    t.equal(-1, mixedByteStringUcs2.indexOf('\u0396', 0, 'ucs2'))
274  
275    t.equal(
276      6, mixedByteStringUcs2.indexOf(Buffer.from('bc', 'ucs2'), 0, 'ucs2'))
277    t.equal(
278      10, mixedByteStringUcs2.indexOf(Buffer.from('\u03a3', 'ucs2'), 0, 'ucs2'))
279    t.equal(
280      -1, mixedByteStringUcs2.indexOf(Buffer.from('\u0396', 'ucs2'), 0, 'ucs2'))
281  
282    {
283      const twoByteString = Buffer.from('\u039a\u0391\u03a3\u03a3\u0395', 'ucs2')
284  
285      // Test single char pattern
286      t.equal(0, twoByteString.indexOf('\u039a', 0, 'ucs2'))
287      let index = twoByteString.indexOf('\u0391', 0, 'ucs2')
288      t.equal(2, index, `Alpha - at index ${index}`)
289      index = twoByteString.indexOf('\u03a3', 0, 'ucs2')
290      t.equal(4, index, `First Sigma - at index ${index}`)
291      index = twoByteString.indexOf('\u03a3', 6, 'ucs2')
292      t.equal(6, index, `Second Sigma - at index ${index}`)
293      index = twoByteString.indexOf('\u0395', 0, 'ucs2')
294      t.equal(8, index, `Epsilon - at index ${index}`)
295      index = twoByteString.indexOf('\u0392', 0, 'ucs2')
296      t.equal(-1, index, `Not beta - at index ${index}`)
297  
298      // Test multi-char pattern
299      index = twoByteString.indexOf('\u039a\u0391', 0, 'ucs2')
300      t.equal(0, index, `Lambda Alpha - at index ${index}`)
301      index = twoByteString.indexOf('\u0391\u03a3', 0, 'ucs2')
302      t.equal(2, index, `Alpha Sigma - at index ${index}`)
303      index = twoByteString.indexOf('\u03a3\u03a3', 0, 'ucs2')
304      t.equal(4, index, `Sigma Sigma - at index ${index}`)
305      index = twoByteString.indexOf('\u03a3\u0395', 0, 'ucs2')
306      t.equal(6, index, `Sigma Epsilon - at index ${index}`)
307    }
308  
309    const mixedByteStringUtf8 = Buffer.from('\u039a\u0391abc\u03a3\u03a3\u0395')
310  
311    t.equal(5, mixedByteStringUtf8.indexOf('bc'))
312    t.equal(5, mixedByteStringUtf8.indexOf('bc', 5))
313    t.equal(5, mixedByteStringUtf8.indexOf('bc', -8))
314    t.equal(7, mixedByteStringUtf8.indexOf('\u03a3'))
315    t.equal(-1, mixedByteStringUtf8.indexOf('\u0396'))
316  
317    // Test complex string indexOf algorithms. Only trigger for long strings.
318    // Long string that isn't a simple repeat of a shorter string.
319    let longString = 'A'
320    for (let i = 66; i < 76; i++) { // from 'B' to 'K'
321      longString = longString + String.fromCharCode(i) + longString
322    }
323  
324    const longBufferString = Buffer.from(longString)
325  
326    // pattern of 15 chars, repeated every 16 chars in long
327    let pattern = 'ABACABADABACABA'
328    for (let i = 0; i < longBufferString.length - pattern.length; i += 7) {
329      const index = longBufferString.indexOf(pattern, i)
330      t.equal((i + 15) & ~0xf, index,
331        `Long ABACABA...-string at index ${i}`)
332    }
333  
334    let index = longBufferString.indexOf('AJABACA')
335    t.equal(510, index, `Long AJABACA, First J - at index ${index}`)
336    index = longBufferString.indexOf('AJABACA', 511)
337    t.equal(1534, index, `Long AJABACA, Second J - at index ${index}`)
338  
339    pattern = 'JABACABADABACABA'
340    index = longBufferString.indexOf(pattern)
341    t.equal(511, index, `Long JABACABA..., First J - at index ${index}`)
342    index = longBufferString.indexOf(pattern, 512)
343    t.equal(
344      1535, index, `Long JABACABA..., Second J - at index ${index}`)
345  
346    // Search for a non-ASCII string in a pure ASCII string.
347    const asciiString = Buffer.from(
348      'somethingnotatallsinisterwhichalsoworks')
349    t.equal(-1, asciiString.indexOf('\x2061'))
350    t.equal(3, asciiString.indexOf('eth', 0))
351  
352    // Search in string containing many non-ASCII chars.
353    const allCodePoints = []
354    for (let i = 0; i < 65536; i++) {
355      allCodePoints[i] = i
356    }
357  
358    const allCharsString = String.fromCharCode.apply(String, allCodePoints)
359    const allCharsBufferUtf8 = Buffer.from(allCharsString)
360    const allCharsBufferUcs2 = Buffer.from(allCharsString, 'ucs2')
361  
362    // Search for string long enough to trigger complex search with ASCII pattern
363    // and UC16 subject.
364    t.equal(-1, allCharsBufferUtf8.indexOf('notfound'))
365    t.equal(-1, allCharsBufferUcs2.indexOf('notfound'))
366  
367    // Needle is longer than haystack, but only because it's encoded as UTF-16
368    t.equal(Buffer.from('aaaa').indexOf('a'.repeat(4), 'ucs2'), -1)
369  
370    t.equal(Buffer.from('aaaa').indexOf('a'.repeat(4), 'utf8'), 0)
371    t.equal(Buffer.from('aaaa').indexOf('你好', 'ucs2'), -1)
372  
373    // Haystack has odd length, but the needle is UCS2.
374    t.equal(Buffer.from('aaaaa').indexOf('b', 'ucs2'), -1)
375  
376    {
377      // Find substrings in Utf8.
378      const lengths = [1, 3, 15] // Single char, simple and complex.
379      const indices = [0x5, 0x60, 0x400, 0x680, 0x7ee, 0xFF02, 0x16610, 0x2f77b]
380      for (let lengthIndex = 0; lengthIndex < lengths.length; lengthIndex++) {
381        for (let i = 0; i < indices.length; i++) {
382          const index = indices[i]
383          let length = lengths[lengthIndex]
384  
385          if (index + length > 0x7F) {
386            length = 2 * length
387          }
388  
389          if (index + length > 0x7FF) {
390            length = 3 * length
391          }
392  
393          if (index + length > 0xFFFF) {
394            length = 4 * length
395          }
396  
397          const patternBufferUtf8 = allCharsBufferUtf8.slice(index, index + length)
398          t.equal(index, allCharsBufferUtf8.indexOf(patternBufferUtf8))
399  
400          const patternStringUtf8 = patternBufferUtf8.toString()
401          t.equal(index, allCharsBufferUtf8.indexOf(patternStringUtf8))
402        }
403      }
404    }
405  
406    {
407      // Find substrings in Usc2.
408      const lengths = [2, 4, 16] // Single char, simple and complex.
409      const indices = [0x5, 0x65, 0x105, 0x205, 0x285, 0x2005, 0x2085, 0xfff0]
410  
411      for (let lengthIndex = 0; lengthIndex < lengths.length; lengthIndex++) {
412        for (let i = 0; i < indices.length; i++) {
413          const index = indices[i] * 2
414          const length = lengths[lengthIndex]
415  
416          const patternBufferUcs2 =
417              allCharsBufferUcs2.slice(index, index + length)
418          t.equal(
419            index, allCharsBufferUcs2.indexOf(patternBufferUcs2, 0, 'ucs2'))
420  
421          const patternStringUcs2 = patternBufferUcs2.toString('ucs2')
422          t.equal(
423            index, allCharsBufferUcs2.indexOf(patternStringUcs2, 0, 'ucs2'))
424        }
425      }
426    }
427  
428    [
429      () => {},
430      {},
431      []
432    ].forEach((val) => {
433      t.throws(() => b.indexOf(val), TypeError, `"${JSON.stringify(val)}" should throw`)
434    })
435  
436    // Test weird offset arguments.
437    // The following offsets coerce to NaN or 0, searching the whole Buffer
438    t.equal(b.indexOf('b', undefined), 1)
439    t.equal(b.indexOf('b', {}), 1)
440    t.equal(b.indexOf('b', 0), 1)
441    t.equal(b.indexOf('b', null), 1)
442    t.equal(b.indexOf('b', []), 1)
443  
444    // The following offset coerces to 2, in other words +[2] === 2
445    t.equal(b.indexOf('b', [2]), -1)
446  
447    // Behavior should match String.indexOf()
448    t.equal(
449      b.indexOf('b', undefined),
450      stringComparison.indexOf('b', undefined))
451    t.equal(
452      b.indexOf('b', {}),
453      stringComparison.indexOf('b', {}))
454    t.equal(
455      b.indexOf('b', 0),
456      stringComparison.indexOf('b', 0))
457    t.equal(
458      b.indexOf('b', null),
459      stringComparison.indexOf('b', null))
460    t.equal(
461      b.indexOf('b', []),
462      stringComparison.indexOf('b', []))
463    t.equal(
464      b.indexOf('b', [2]),
465      stringComparison.indexOf('b', [2]))
466  
467    // test truncation of Number arguments to uint8
468    {
469      const buf = Buffer.from('this is a test')
470  
471      t.equal(buf.indexOf(0x6973), 3)
472      t.equal(buf.indexOf(0x697320), 4)
473      t.equal(buf.indexOf(0x69732069), 2)
474      t.equal(buf.indexOf(0x697374657374), 0)
475      t.equal(buf.indexOf(0x69737374), 0)
476      t.equal(buf.indexOf(0x69737465), 11)
477      t.equal(buf.indexOf(0x69737465), 11)
478      t.equal(buf.indexOf(-140), 0)
479      t.equal(buf.indexOf(-152), 1)
480      t.equal(buf.indexOf(0xff), -1)
481      t.equal(buf.indexOf(0xffff), -1)
482    }
483  
484    // Test that Uint8Array arguments are okay.
485    {
486      const needle = new Uint8Array([0x66, 0x6f, 0x6f])
487      const haystack = new BufferList(Buffer.from('a foo b foo'))
488      t.equal(haystack.indexOf(needle), 2)
489    }
490  
491    t.end()
492  })