/ contracts / lib / strings.sol
strings.sol
  1  /*
  2   * @title String & slice utility library for Solidity contracts.
  3   * @author Nick Johnson <arachnid@notdot.net>
  4   *
  5   * @dev Functionality in this library is largely implemented using an
  6   *      abstraction called a 'slice'. A slice represents a part of a string -
  7   *      anything from the entire string to a single character, or even no
  8   *      characters at all (a 0-length slice). Since a slice only has to specify
  9   *      an offset and a length, copying and manipulating slices is a lot less
 10   *      expensive than copying and manipulating the strings they reference.
 11   *
 12   *      To further reduce gas costs, most functions on slice that need to return
 13   *      a slice modify the original one instead of allocating a new one; for
 14   *      instance, `s.split(".")` will return the text up to the first '.',
 15   *      modifying s to only contain the remainder of the string after the '.'.
 16   *      In situations where you do not want to modify the original slice, you
 17   *      can make a copy first with `.copy()`, for example:
 18   *      `s.copy().split(".")`. Try and avoid using this idiom in loops; since
 19   *      Solidity has no memory management, it will result in allocating many
 20   *      short-lived slices that are later discarded.
 21   *
 22   *      Functions that return two slices come in two versions: a non-allocating
 23   *      version that takes the second slice as an argument, modifying it in
 24   *      place, and an allocating version that allocates and returns the second
 25   *      slice; see `nextRune` for example.
 26   *
 27   *      Functions that have to copy string data will return strings rather than
 28   *      slices; these can be cast back to slices for further processing if
 29   *      required.
 30   *
 31   *      For convenience, some functions are provided with non-modifying
 32   *      variants that create a new slice and return both; for instance,
 33   *      `s.splitNew('.')` leaves s unmodified, and returns two values
 34   *      corresponding to the left and right parts of the string.
 35   */
 36  pragma solidity ^0.4.11;
 37   
 38  library strings {
 39      struct slice {
 40          uint _len;
 41          uint _ptr;
 42      }
 43  
 44      function memcpy(uint dest, uint src, uint len) private {
 45          // Copy word-length chunks while possible
 46          for(; len >= 32; len -= 32) {
 47              assembly {
 48                  mstore(dest, mload(src))
 49              }
 50              dest += 32;
 51              src += 32;
 52          }
 53  
 54          // Copy remaining bytes
 55          uint mask = 256 ** (32 - len) - 1;
 56          assembly {
 57              let srcpart := and(mload(src), not(mask))
 58              let destpart := and(mload(dest), mask)
 59              mstore(dest, or(destpart, srcpart))
 60          }
 61      }
 62  
 63      /*
 64       * @dev Returns a slice containing the entire string.
 65       * @param self The string to make a slice from.
 66       * @return A newly allocated slice containing the entire string.
 67       */
 68      function toSlice(string self) internal returns (slice) {
 69          uint ptr;
 70          assembly {
 71              ptr := add(self, 0x20)
 72          }
 73          return slice(bytes(self).length, ptr);
 74      }
 75  
 76      /*
 77       * @dev Returns the length of a null-terminated bytes32 string.
 78       * @param self The value to find the length of.
 79       * @return The length of the string, from 0 to 32.
 80       */
 81      function len(bytes32 self) internal returns (uint) {
 82          uint ret;
 83          if (self == 0)
 84              return 0;
 85          if (self & 0xffffffffffffffffffffffffffffffff == 0) {
 86              ret += 16;
 87              self = bytes32(uint(self) / 0x100000000000000000000000000000000);
 88          }
 89          if (self & 0xffffffffffffffff == 0) {
 90              ret += 8;
 91              self = bytes32(uint(self) / 0x10000000000000000);
 92          }
 93          if (self & 0xffffffff == 0) {
 94              ret += 4;
 95              self = bytes32(uint(self) / 0x100000000);
 96          }
 97          if (self & 0xffff == 0) {
 98              ret += 2;
 99              self = bytes32(uint(self) / 0x10000);
100          }
101          if (self & 0xff == 0) {
102              ret += 1;
103          }
104          return 32 - ret;
105      }
106  
107      /*
108       * @dev Returns a slice containing the entire bytes32, interpreted as a
109       *      null-termintaed utf-8 string.
110       * @param self The bytes32 value to convert to a slice.
111       * @return A new slice containing the value of the input argument up to the
112       *         first null.
113       */
114      function toSliceB32(bytes32 self) internal returns (slice ret) {
115          // Allocate space for `self` in memory, copy it there, and point ret at it
116          assembly {
117              let ptr := mload(0x40)
118              mstore(0x40, add(ptr, 0x20))
119              mstore(ptr, self)
120              mstore(add(ret, 0x20), ptr)
121          }
122          ret._len = len(self);
123      }
124  
125      /*
126       * @dev Returns a new slice containing the same data as the current slice.
127       * @param self The slice to copy.
128       * @return A new slice containing the same data as `self`.
129       */
130      function copy(slice self) internal returns (slice) {
131          return slice(self._len, self._ptr);
132      }
133  
134      /*
135       * @dev Copies a slice to a new string.
136       * @param self The slice to copy.
137       * @return A newly allocated string containing the slice's text.
138       */
139      function toString(slice self) internal returns (string) {
140          var ret = new string(self._len);
141          uint retptr;
142          assembly { retptr := add(ret, 32) }
143  
144          memcpy(retptr, self._ptr, self._len);
145          return ret;
146      }
147  
148      /*
149       * @dev Returns the length in runes of the slice. Note that this operation
150       *      takes time proportional to the length of the slice; avoid using it
151       *      in loops, and call `slice.empty()` if you only need to know whether
152       *      the slice is empty or not.
153       * @param self The slice to operate on.
154       * @return The length of the slice in runes.
155       */
156      function len(slice self) internal returns (uint) {
157          // Starting at ptr-31 means the LSB will be the byte we care about
158          var ptr = self._ptr - 31;
159          var end = ptr + self._len;
160          for (uint len = 0; ptr < end; len++) {
161              uint8 b;
162              assembly { b := and(mload(ptr), 0xFF) }
163              if (b < 0x80) {
164                  ptr += 1;
165              } else if(b < 0xE0) {
166                  ptr += 2;
167              } else if(b < 0xF0) {
168                  ptr += 3;
169              } else if(b < 0xF8) {
170                  ptr += 4;
171              } else if(b < 0xFC) {
172                  ptr += 5;
173              } else {
174                  ptr += 6;
175              }
176          }
177          return len;
178      }
179  
180      /*
181       * @dev Returns true if the slice is empty (has a length of 0).
182       * @param self The slice to operate on.
183       * @return True if the slice is empty, False otherwise.
184       */
185      function empty(slice self) internal returns (bool) {
186          return self._len == 0;
187      }
188  
189      /*
190       * @dev Returns a positive number if `other` comes lexicographically after
191       *      `self`, a negative number if it comes before, or zero if the
192       *      contents of the two slices are equal. Comparison is done per-rune,
193       *      on unicode codepoints.
194       * @param self The first slice to compare.
195       * @param other The second slice to compare.
196       * @return The result of the comparison.
197       */
198      function compare(slice self, slice other) internal returns (int) {
199          uint shortest = self._len;
200          if (other._len < self._len)
201              shortest = other._len;
202  
203          var selfptr = self._ptr;
204          var otherptr = other._ptr;
205          for (uint idx = 0; idx < shortest; idx += 32) {
206              uint a;
207              uint b;
208              assembly {
209                  a := mload(selfptr)
210                  b := mload(otherptr)
211              }
212              if (a != b) {
213                  // Mask out irrelevant bytes and check again
214                  uint mask = ~(2 ** (8 * (32 - shortest + idx)) - 1);
215                  var diff = (a & mask) - (b & mask);
216                  if (diff != 0)
217                      return int(diff);
218              }
219              selfptr += 32;
220              otherptr += 32;
221          }
222          return int(self._len) - int(other._len);
223      }
224  
225      /*
226       * @dev Returns true if the two slices contain the same text.
227       * @param self The first slice to compare.
228       * @param self The second slice to compare.
229       * @return True if the slices are equal, false otherwise.
230       */
231      function equals(slice self, slice other) internal returns (bool) {
232          return compare(self, other) == 0;
233      }
234  
235      /*
236       * @dev Extracts the first rune in the slice into `rune`, advancing the
237       *      slice to point to the next rune and returning `self`.
238       * @param self The slice to operate on.
239       * @param rune The slice that will contain the first rune.
240       * @return `rune`.
241       */
242      function nextRune(slice self, slice rune) internal returns (slice) {
243          rune._ptr = self._ptr;
244  
245          if (self._len == 0) {
246              rune._len = 0;
247              return rune;
248          }
249  
250          uint len;
251          uint b;
252          // Load the first byte of the rune into the LSBs of b
253          assembly { b := and(mload(sub(mload(add(self, 32)), 31)), 0xFF) }
254          if (b < 0x80) {
255              len = 1;
256          } else if(b < 0xE0) {
257              len = 2;
258          } else if(b < 0xF0) {
259              len = 3;
260          } else {
261              len = 4;
262          }
263  
264          // Check for truncated codepoints
265          if (len > self._len) {
266              rune._len = self._len;
267              self._ptr += self._len;
268              self._len = 0;
269              return rune;
270          }
271  
272          self._ptr += len;
273          self._len -= len;
274          rune._len = len;
275          return rune;
276      }
277  
278      /*
279       * @dev Returns the first rune in the slice, advancing the slice to point
280       *      to the next rune.
281       * @param self The slice to operate on.
282       * @return A slice containing only the first rune from `self`.
283       */
284      function nextRune(slice self) internal returns (slice ret) {
285          nextRune(self, ret);
286      }
287  
288      /*
289       * @dev Returns the number of the first codepoint in the slice.
290       * @param self The slice to operate on.
291       * @return The number of the first codepoint in the slice.
292       */
293      function ord(slice self) internal returns (uint ret) {
294          if (self._len == 0) {
295              return 0;
296          }
297  
298          uint word;
299          uint len;
300          uint div = 2 ** 248;
301  
302          // Load the rune into the MSBs of b
303          assembly { word:= mload(mload(add(self, 32))) }
304          var b = word / div;
305          if (b < 0x80) {
306              ret = b;
307              len = 1;
308          } else if(b < 0xE0) {
309              ret = b & 0x1F;
310              len = 2;
311          } else if(b < 0xF0) {
312              ret = b & 0x0F;
313              len = 3;
314          } else {
315              ret = b & 0x07;
316              len = 4;
317          }
318  
319          // Check for truncated codepoints
320          if (len > self._len) {
321              return 0;
322          }
323  
324          for (uint i = 1; i < len; i++) {
325              div = div / 256;
326              b = (word / div) & 0xFF;
327              if (b & 0xC0 != 0x80) {
328                  // Invalid UTF-8 sequence
329                  return 0;
330              }
331              ret = (ret * 64) | (b & 0x3F);
332          }
333  
334          return ret;
335      }
336  
337      /*
338       * @dev Returns the keccak-256 hash of the slice.
339       * @param self The slice to hash.
340       * @return The hash of the slice.
341       */
342      function keccak(slice self) internal returns (bytes32 ret) {
343          assembly {
344              ret := sha3(mload(add(self, 32)), mload(self))
345          }
346      }
347  
348      /*
349       * @dev Returns true if `self` starts with `needle`.
350       * @param self The slice to operate on.
351       * @param needle The slice to search for.
352       * @return True if the slice starts with the provided text, false otherwise.
353       */
354      function startsWith(slice self, slice needle) internal returns (bool) {
355          if (self._len < needle._len) {
356              return false;
357          }
358  
359          if (self._ptr == needle._ptr) {
360              return true;
361          }
362  
363          bool equal;
364          assembly {
365              let len := mload(needle)
366              let selfptr := mload(add(self, 0x20))
367              let needleptr := mload(add(needle, 0x20))
368              equal := eq(sha3(selfptr, len), sha3(needleptr, len))
369          }
370          return equal;
371      }
372  
373      /*
374       * @dev If `self` starts with `needle`, `needle` is removed from the
375       *      beginning of `self`. Otherwise, `self` is unmodified.
376       * @param self The slice to operate on.
377       * @param needle The slice to search for.
378       * @return `self`
379       */
380      function beyond(slice self, slice needle) internal returns (slice) {
381          if (self._len < needle._len) {
382              return self;
383          }
384  
385          bool equal = true;
386          if (self._ptr != needle._ptr) {
387              assembly {
388                  let len := mload(needle)
389                  let selfptr := mload(add(self, 0x20))
390                  let needleptr := mload(add(needle, 0x20))
391                  equal := eq(sha3(selfptr, len), sha3(needleptr, len))
392              }
393          }
394  
395          if (equal) {
396              self._len -= needle._len;
397              self._ptr += needle._len;
398          }
399  
400          return self;
401      }
402  
403      /*
404       * @dev Returns true if the slice ends with `needle`.
405       * @param self The slice to operate on.
406       * @param needle The slice to search for.
407       * @return True if the slice starts with the provided text, false otherwise.
408       */
409      function endsWith(slice self, slice needle) internal returns (bool) {
410          if (self._len < needle._len) {
411              return false;
412          }
413  
414          var selfptr = self._ptr + self._len - needle._len;
415  
416          if (selfptr == needle._ptr) {
417              return true;
418          }
419  
420          bool equal;
421          assembly {
422              let len := mload(needle)
423              let needleptr := mload(add(needle, 0x20))
424              equal := eq(sha3(selfptr, len), sha3(needleptr, len))
425          }
426  
427          return equal;
428      }
429  
430      /*
431       * @dev If `self` ends with `needle`, `needle` is removed from the
432       *      end of `self`. Otherwise, `self` is unmodified.
433       * @param self The slice to operate on.
434       * @param needle The slice to search for.
435       * @return `self`
436       */
437      function until(slice self, slice needle) internal returns (slice) {
438          if (self._len < needle._len) {
439              return self;
440          }
441  
442          var selfptr = self._ptr + self._len - needle._len;
443          bool equal = true;
444          if (selfptr != needle._ptr) {
445              assembly {
446                  let len := mload(needle)
447                  let needleptr := mload(add(needle, 0x20))
448                  equal := eq(sha3(selfptr, len), sha3(needleptr, len))
449              }
450          }
451  
452          if (equal) {
453              self._len -= needle._len;
454          }
455  
456          return self;
457      }
458  
459      // Returns the memory address of the first byte of the first occurrence of
460      // `needle` in `self`, or the first byte after `self` if not found.
461      function findPtr(uint selflen, uint selfptr, uint needlelen, uint needleptr) private returns (uint) {
462          uint ptr;
463          uint idx;
464  
465          if (needlelen <= selflen) {
466              if (needlelen <= 32) {
467                  // Optimized assembly for 68 gas per byte on short strings
468                  assembly {
469                      let mask := not(sub(exp(2, mul(8, sub(32, needlelen))), 1))
470                      let needledata := and(mload(needleptr), mask)
471                      let end := add(selfptr, sub(selflen, needlelen))
472                      ptr := selfptr
473                      loop:
474                      jumpi(exit, eq(and(mload(ptr), mask), needledata))
475                      ptr := add(ptr, 1)
476                      jumpi(loop, lt(sub(ptr, 1), end))
477                      ptr := add(selfptr, selflen)
478                      exit:
479                  }
480                  return ptr;
481              } else {
482                  // For long needles, use hashing
483                  bytes32 hash;
484                  assembly { hash := sha3(needleptr, needlelen) }
485                  ptr = selfptr;
486                  for (idx = 0; idx <= selflen - needlelen; idx++) {
487                      bytes32 testHash;
488                      assembly { testHash := sha3(ptr, needlelen) }
489                      if (hash == testHash)
490                          return ptr;
491                      ptr += 1;
492                  }
493              }
494          }
495          return selfptr + selflen;
496      }
497  
498      // Returns the memory address of the first byte after the last occurrence of
499      // `needle` in `self`, or the address of `self` if not found.
500      function rfindPtr(uint selflen, uint selfptr, uint needlelen, uint needleptr) private returns (uint) {
501          uint ptr;
502  
503          if (needlelen <= selflen) {
504              if (needlelen <= 32) {
505                  // Optimized assembly for 69 gas per byte on short strings
506                  assembly {
507                      let mask := not(sub(exp(2, mul(8, sub(32, needlelen))), 1))
508                      let needledata := and(mload(needleptr), mask)
509                      ptr := add(selfptr, sub(selflen, needlelen))
510                      loop:
511                      jumpi(ret, eq(and(mload(ptr), mask), needledata))
512                      ptr := sub(ptr, 1)
513                      jumpi(loop, gt(add(ptr, 1), selfptr))
514                      ptr := selfptr
515                      jump(exit)
516                      ret:
517                      ptr := add(ptr, needlelen)
518                      exit:
519                  }
520                  return ptr;
521              } else {
522                  // For long needles, use hashing
523                  bytes32 hash;
524                  assembly { hash := sha3(needleptr, needlelen) }
525                  ptr = selfptr + (selflen - needlelen);
526                  while (ptr >= selfptr) {
527                      bytes32 testHash;
528                      assembly { testHash := sha3(ptr, needlelen) }
529                      if (hash == testHash)
530                          return ptr + needlelen;
531                      ptr -= 1;
532                  }
533              }
534          }
535          return selfptr;
536      }
537  
538      /*
539       * @dev Modifies `self` to contain everything from the first occurrence of
540       *      `needle` to the end of the slice. `self` is set to the empty slice
541       *      if `needle` is not found.
542       * @param self The slice to search and modify.
543       * @param needle The text to search for.
544       * @return `self`.
545       */
546      function find(slice self, slice needle) internal returns (slice) {
547          uint ptr = findPtr(self._len, self._ptr, needle._len, needle._ptr);
548          self._len -= ptr - self._ptr;
549          self._ptr = ptr;
550          return self;
551      }
552  
553      /*
554       * @dev Modifies `self` to contain the part of the string from the start of
555       *      `self` to the end of the first occurrence of `needle`. If `needle`
556       *      is not found, `self` is set to the empty slice.
557       * @param self The slice to search and modify.
558       * @param needle The text to search for.
559       * @return `self`.
560       */
561      function rfind(slice self, slice needle) internal returns (slice) {
562          uint ptr = rfindPtr(self._len, self._ptr, needle._len, needle._ptr);
563          self._len = ptr - self._ptr;
564          return self;
565      }
566  
567      /*
568       * @dev Splits the slice, setting `self` to everything after the first
569       *      occurrence of `needle`, and `token` to everything before it. If
570       *      `needle` does not occur in `self`, `self` is set to the empty slice,
571       *      and `token` is set to the entirety of `self`.
572       * @param self The slice to split.
573       * @param needle The text to search for in `self`.
574       * @param token An output parameter to which the first token is written.
575       * @return `token`.
576       */
577      function split(slice self, slice needle, slice token) internal returns (slice) {
578          uint ptr = findPtr(self._len, self._ptr, needle._len, needle._ptr);
579          token._ptr = self._ptr;
580          token._len = ptr - self._ptr;
581          if (ptr == self._ptr + self._len) {
582              // Not found
583              self._len = 0;
584          } else {
585              self._len -= token._len + needle._len;
586              self._ptr = ptr + needle._len;
587          }
588          return token;
589      }
590  
591      /*
592       * @dev Splits the slice, setting `self` to everything after the first
593       *      occurrence of `needle`, and returning everything before it. If
594       *      `needle` does not occur in `self`, `self` is set to the empty slice,
595       *      and the entirety of `self` is returned.
596       * @param self The slice to split.
597       * @param needle The text to search for in `self`.
598       * @return The part of `self` up to the first occurrence of `delim`.
599       */
600      function split(slice self, slice needle) internal returns (slice token) {
601          split(self, needle, token);
602      }
603  
604      /*
605       * @dev Splits the slice, setting `self` to everything before the last
606       *      occurrence of `needle`, and `token` to everything after it. If
607       *      `needle` does not occur in `self`, `self` is set to the empty slice,
608       *      and `token` is set to the entirety of `self`.
609       * @param self The slice to split.
610       * @param needle The text to search for in `self`.
611       * @param token An output parameter to which the first token is written.
612       * @return `token`.
613       */
614      function rsplit(slice self, slice needle, slice token) internal returns (slice) {
615          uint ptr = rfindPtr(self._len, self._ptr, needle._len, needle._ptr);
616          token._ptr = ptr;
617          token._len = self._len - (ptr - self._ptr);
618          if (ptr == self._ptr) {
619              // Not found
620              self._len = 0;
621          } else {
622              self._len -= token._len + needle._len;
623          }
624          return token;
625      }
626  
627      /*
628       * @dev Splits the slice, setting `self` to everything before the last
629       *      occurrence of `needle`, and returning everything after it. If
630       *      `needle` does not occur in `self`, `self` is set to the empty slice,
631       *      and the entirety of `self` is returned.
632       * @param self The slice to split.
633       * @param needle The text to search for in `self`.
634       * @return The part of `self` after the last occurrence of `delim`.
635       */
636      function rsplit(slice self, slice needle) internal returns (slice token) {
637          rsplit(self, needle, token);
638      }
639  
640      /*
641       * @dev Counts the number of nonoverlapping occurrences of `needle` in `self`.
642       * @param self The slice to search.
643       * @param needle The text to search for in `self`.
644       * @return The number of occurrences of `needle` found in `self`.
645       */
646      function count(slice self, slice needle) internal returns (uint count) {
647          uint ptr = findPtr(self._len, self._ptr, needle._len, needle._ptr) + needle._len;
648          while (ptr <= self._ptr + self._len) {
649              count++;
650              ptr = findPtr(self._len - (ptr - self._ptr), ptr, needle._len, needle._ptr) + needle._len;
651          }
652      }
653  
654      /*
655       * @dev Returns True if `self` contains `needle`.
656       * @param self The slice to search.
657       * @param needle The text to search for in `self`.
658       * @return True if `needle` is found in `self`, false otherwise.
659       */
660      function contains(slice self, slice needle) internal returns (bool) {
661          return rfindPtr(self._len, self._ptr, needle._len, needle._ptr) != self._ptr;
662      }
663  
664      /*
665       * @dev Returns a newly allocated string containing the concatenation of
666       *      `self` and `other`.
667       * @param self The first slice to concatenate.
668       * @param other The second slice to concatenate.
669       * @return The concatenation of the two strings.
670       */
671      function concat(slice self, slice other) internal returns (string) {
672          var ret = new string(self._len + other._len);
673          uint retptr;
674          assembly { retptr := add(ret, 32) }
675          memcpy(retptr, self._ptr, self._len);
676          memcpy(retptr + self._len, other._ptr, other._len);
677          return ret;
678      }
679  
680      /*
681       * @dev Joins an array of slices, using `self` as a delimiter, returning a
682       *      newly allocated string.
683       * @param self The delimiter to use.
684       * @param parts A list of slices to join.
685       * @return A newly allocated string containing all the slices in `parts`,
686       *         joined with `self`.
687       */
688      function join(slice self, slice[] parts) internal returns (string) {
689          if (parts.length == 0)
690              return "";
691  
692          uint len = self._len * (parts.length - 1);
693          for(uint i = 0; i < parts.length; i++)
694              len += parts[i]._len;
695  
696          var ret = new string(len);
697          uint retptr;
698          assembly { retptr := add(ret, 32) }
699  
700          for(i = 0; i < parts.length; i++) {
701              memcpy(retptr, parts[i]._ptr, parts[i]._len);
702              retptr += parts[i]._len;
703              if (i < parts.length - 1) {
704                  memcpy(retptr, self._ptr, self._len);
705                  retptr += self._len;
706              }
707          }
708  
709          return ret;
710      }
711  }