strings.sol
1 /* 2 * @title String & slice utility library for Solidity contracts. 3 * @author Nick Johnson <arachnid@notdot.net> 4 * 5 * @dev Functionality in this library is largely implemented using an 6 * abstraction called a 'slice'. A slice represents a part of a string - 7 * anything from the entire string to a single character, or even no 8 * characters at all (a 0-length slice). Since a slice only has to specify 9 * an offset and a length, copying and manipulating slices is a lot less 10 * expensive than copying and manipulating the strings they reference. 11 * 12 * To further reduce gas costs, most functions on slice that need to return 13 * a slice modify the original one instead of allocating a new one; for 14 * instance, `s.split(".")` will return the text up to the first '.', 15 * modifying s to only contain the remainder of the string after the '.'. 16 * In situations where you do not want to modify the original slice, you 17 * can make a copy first with `.copy()`, for example: 18 * `s.copy().split(".")`. Try and avoid using this idiom in loops; since 19 * Solidity has no memory management, it will result in allocating many 20 * short-lived slices that are later discarded. 21 * 22 * Functions that return two slices come in two versions: a non-allocating 23 * version that takes the second slice as an argument, modifying it in 24 * place, and an allocating version that allocates and returns the second 25 * slice; see `nextRune` for example. 26 * 27 * Functions that have to copy string data will return strings rather than 28 * slices; these can be cast back to slices for further processing if 29 * required. 30 * 31 * For convenience, some functions are provided with non-modifying 32 * variants that create a new slice and return both; for instance, 33 * `s.splitNew('.')` leaves s unmodified, and returns two values 34 * corresponding to the left and right parts of the string. 35 */ 36 pragma solidity ^0.4.11; 37 38 library strings { 39 struct slice { 40 uint _len; 41 uint _ptr; 42 } 43 44 function memcpy(uint dest, uint src, uint len) private { 45 // Copy word-length chunks while possible 46 for(; len >= 32; len -= 32) { 47 assembly { 48 mstore(dest, mload(src)) 49 } 50 dest += 32; 51 src += 32; 52 } 53 54 // Copy remaining bytes 55 uint mask = 256 ** (32 - len) - 1; 56 assembly { 57 let srcpart := and(mload(src), not(mask)) 58 let destpart := and(mload(dest), mask) 59 mstore(dest, or(destpart, srcpart)) 60 } 61 } 62 63 /* 64 * @dev Returns a slice containing the entire string. 65 * @param self The string to make a slice from. 66 * @return A newly allocated slice containing the entire string. 67 */ 68 function toSlice(string self) internal returns (slice) { 69 uint ptr; 70 assembly { 71 ptr := add(self, 0x20) 72 } 73 return slice(bytes(self).length, ptr); 74 } 75 76 /* 77 * @dev Returns the length of a null-terminated bytes32 string. 78 * @param self The value to find the length of. 79 * @return The length of the string, from 0 to 32. 80 */ 81 function len(bytes32 self) internal returns (uint) { 82 uint ret; 83 if (self == 0) 84 return 0; 85 if (self & 0xffffffffffffffffffffffffffffffff == 0) { 86 ret += 16; 87 self = bytes32(uint(self) / 0x100000000000000000000000000000000); 88 } 89 if (self & 0xffffffffffffffff == 0) { 90 ret += 8; 91 self = bytes32(uint(self) / 0x10000000000000000); 92 } 93 if (self & 0xffffffff == 0) { 94 ret += 4; 95 self = bytes32(uint(self) / 0x100000000); 96 } 97 if (self & 0xffff == 0) { 98 ret += 2; 99 self = bytes32(uint(self) / 0x10000); 100 } 101 if (self & 0xff == 0) { 102 ret += 1; 103 } 104 return 32 - ret; 105 } 106 107 /* 108 * @dev Returns a slice containing the entire bytes32, interpreted as a 109 * null-termintaed utf-8 string. 110 * @param self The bytes32 value to convert to a slice. 111 * @return A new slice containing the value of the input argument up to the 112 * first null. 113 */ 114 function toSliceB32(bytes32 self) internal returns (slice ret) { 115 // Allocate space for `self` in memory, copy it there, and point ret at it 116 assembly { 117 let ptr := mload(0x40) 118 mstore(0x40, add(ptr, 0x20)) 119 mstore(ptr, self) 120 mstore(add(ret, 0x20), ptr) 121 } 122 ret._len = len(self); 123 } 124 125 /* 126 * @dev Returns a new slice containing the same data as the current slice. 127 * @param self The slice to copy. 128 * @return A new slice containing the same data as `self`. 129 */ 130 function copy(slice self) internal returns (slice) { 131 return slice(self._len, self._ptr); 132 } 133 134 /* 135 * @dev Copies a slice to a new string. 136 * @param self The slice to copy. 137 * @return A newly allocated string containing the slice's text. 138 */ 139 function toString(slice self) internal returns (string) { 140 var ret = new string(self._len); 141 uint retptr; 142 assembly { retptr := add(ret, 32) } 143 144 memcpy(retptr, self._ptr, self._len); 145 return ret; 146 } 147 148 /* 149 * @dev Returns the length in runes of the slice. Note that this operation 150 * takes time proportional to the length of the slice; avoid using it 151 * in loops, and call `slice.empty()` if you only need to know whether 152 * the slice is empty or not. 153 * @param self The slice to operate on. 154 * @return The length of the slice in runes. 155 */ 156 function len(slice self) internal returns (uint) { 157 // Starting at ptr-31 means the LSB will be the byte we care about 158 var ptr = self._ptr - 31; 159 var end = ptr + self._len; 160 for (uint len = 0; ptr < end; len++) { 161 uint8 b; 162 assembly { b := and(mload(ptr), 0xFF) } 163 if (b < 0x80) { 164 ptr += 1; 165 } else if(b < 0xE0) { 166 ptr += 2; 167 } else if(b < 0xF0) { 168 ptr += 3; 169 } else if(b < 0xF8) { 170 ptr += 4; 171 } else if(b < 0xFC) { 172 ptr += 5; 173 } else { 174 ptr += 6; 175 } 176 } 177 return len; 178 } 179 180 /* 181 * @dev Returns true if the slice is empty (has a length of 0). 182 * @param self The slice to operate on. 183 * @return True if the slice is empty, False otherwise. 184 */ 185 function empty(slice self) internal returns (bool) { 186 return self._len == 0; 187 } 188 189 /* 190 * @dev Returns a positive number if `other` comes lexicographically after 191 * `self`, a negative number if it comes before, or zero if the 192 * contents of the two slices are equal. Comparison is done per-rune, 193 * on unicode codepoints. 194 * @param self The first slice to compare. 195 * @param other The second slice to compare. 196 * @return The result of the comparison. 197 */ 198 function compare(slice self, slice other) internal returns (int) { 199 uint shortest = self._len; 200 if (other._len < self._len) 201 shortest = other._len; 202 203 var selfptr = self._ptr; 204 var otherptr = other._ptr; 205 for (uint idx = 0; idx < shortest; idx += 32) { 206 uint a; 207 uint b; 208 assembly { 209 a := mload(selfptr) 210 b := mload(otherptr) 211 } 212 if (a != b) { 213 // Mask out irrelevant bytes and check again 214 uint mask = ~(2 ** (8 * (32 - shortest + idx)) - 1); 215 var diff = (a & mask) - (b & mask); 216 if (diff != 0) 217 return int(diff); 218 } 219 selfptr += 32; 220 otherptr += 32; 221 } 222 return int(self._len) - int(other._len); 223 } 224 225 /* 226 * @dev Returns true if the two slices contain the same text. 227 * @param self The first slice to compare. 228 * @param self The second slice to compare. 229 * @return True if the slices are equal, false otherwise. 230 */ 231 function equals(slice self, slice other) internal returns (bool) { 232 return compare(self, other) == 0; 233 } 234 235 /* 236 * @dev Extracts the first rune in the slice into `rune`, advancing the 237 * slice to point to the next rune and returning `self`. 238 * @param self The slice to operate on. 239 * @param rune The slice that will contain the first rune. 240 * @return `rune`. 241 */ 242 function nextRune(slice self, slice rune) internal returns (slice) { 243 rune._ptr = self._ptr; 244 245 if (self._len == 0) { 246 rune._len = 0; 247 return rune; 248 } 249 250 uint len; 251 uint b; 252 // Load the first byte of the rune into the LSBs of b 253 assembly { b := and(mload(sub(mload(add(self, 32)), 31)), 0xFF) } 254 if (b < 0x80) { 255 len = 1; 256 } else if(b < 0xE0) { 257 len = 2; 258 } else if(b < 0xF0) { 259 len = 3; 260 } else { 261 len = 4; 262 } 263 264 // Check for truncated codepoints 265 if (len > self._len) { 266 rune._len = self._len; 267 self._ptr += self._len; 268 self._len = 0; 269 return rune; 270 } 271 272 self._ptr += len; 273 self._len -= len; 274 rune._len = len; 275 return rune; 276 } 277 278 /* 279 * @dev Returns the first rune in the slice, advancing the slice to point 280 * to the next rune. 281 * @param self The slice to operate on. 282 * @return A slice containing only the first rune from `self`. 283 */ 284 function nextRune(slice self) internal returns (slice ret) { 285 nextRune(self, ret); 286 } 287 288 /* 289 * @dev Returns the number of the first codepoint in the slice. 290 * @param self The slice to operate on. 291 * @return The number of the first codepoint in the slice. 292 */ 293 function ord(slice self) internal returns (uint ret) { 294 if (self._len == 0) { 295 return 0; 296 } 297 298 uint word; 299 uint len; 300 uint div = 2 ** 248; 301 302 // Load the rune into the MSBs of b 303 assembly { word:= mload(mload(add(self, 32))) } 304 var b = word / div; 305 if (b < 0x80) { 306 ret = b; 307 len = 1; 308 } else if(b < 0xE0) { 309 ret = b & 0x1F; 310 len = 2; 311 } else if(b < 0xF0) { 312 ret = b & 0x0F; 313 len = 3; 314 } else { 315 ret = b & 0x07; 316 len = 4; 317 } 318 319 // Check for truncated codepoints 320 if (len > self._len) { 321 return 0; 322 } 323 324 for (uint i = 1; i < len; i++) { 325 div = div / 256; 326 b = (word / div) & 0xFF; 327 if (b & 0xC0 != 0x80) { 328 // Invalid UTF-8 sequence 329 return 0; 330 } 331 ret = (ret * 64) | (b & 0x3F); 332 } 333 334 return ret; 335 } 336 337 /* 338 * @dev Returns the keccak-256 hash of the slice. 339 * @param self The slice to hash. 340 * @return The hash of the slice. 341 */ 342 function keccak(slice self) internal returns (bytes32 ret) { 343 assembly { 344 ret := sha3(mload(add(self, 32)), mload(self)) 345 } 346 } 347 348 /* 349 * @dev Returns true if `self` starts with `needle`. 350 * @param self The slice to operate on. 351 * @param needle The slice to search for. 352 * @return True if the slice starts with the provided text, false otherwise. 353 */ 354 function startsWith(slice self, slice needle) internal returns (bool) { 355 if (self._len < needle._len) { 356 return false; 357 } 358 359 if (self._ptr == needle._ptr) { 360 return true; 361 } 362 363 bool equal; 364 assembly { 365 let len := mload(needle) 366 let selfptr := mload(add(self, 0x20)) 367 let needleptr := mload(add(needle, 0x20)) 368 equal := eq(sha3(selfptr, len), sha3(needleptr, len)) 369 } 370 return equal; 371 } 372 373 /* 374 * @dev If `self` starts with `needle`, `needle` is removed from the 375 * beginning of `self`. Otherwise, `self` is unmodified. 376 * @param self The slice to operate on. 377 * @param needle The slice to search for. 378 * @return `self` 379 */ 380 function beyond(slice self, slice needle) internal returns (slice) { 381 if (self._len < needle._len) { 382 return self; 383 } 384 385 bool equal = true; 386 if (self._ptr != needle._ptr) { 387 assembly { 388 let len := mload(needle) 389 let selfptr := mload(add(self, 0x20)) 390 let needleptr := mload(add(needle, 0x20)) 391 equal := eq(sha3(selfptr, len), sha3(needleptr, len)) 392 } 393 } 394 395 if (equal) { 396 self._len -= needle._len; 397 self._ptr += needle._len; 398 } 399 400 return self; 401 } 402 403 /* 404 * @dev Returns true if the slice ends with `needle`. 405 * @param self The slice to operate on. 406 * @param needle The slice to search for. 407 * @return True if the slice starts with the provided text, false otherwise. 408 */ 409 function endsWith(slice self, slice needle) internal returns (bool) { 410 if (self._len < needle._len) { 411 return false; 412 } 413 414 var selfptr = self._ptr + self._len - needle._len; 415 416 if (selfptr == needle._ptr) { 417 return true; 418 } 419 420 bool equal; 421 assembly { 422 let len := mload(needle) 423 let needleptr := mload(add(needle, 0x20)) 424 equal := eq(sha3(selfptr, len), sha3(needleptr, len)) 425 } 426 427 return equal; 428 } 429 430 /* 431 * @dev If `self` ends with `needle`, `needle` is removed from the 432 * end of `self`. Otherwise, `self` is unmodified. 433 * @param self The slice to operate on. 434 * @param needle The slice to search for. 435 * @return `self` 436 */ 437 function until(slice self, slice needle) internal returns (slice) { 438 if (self._len < needle._len) { 439 return self; 440 } 441 442 var selfptr = self._ptr + self._len - needle._len; 443 bool equal = true; 444 if (selfptr != needle._ptr) { 445 assembly { 446 let len := mload(needle) 447 let needleptr := mload(add(needle, 0x20)) 448 equal := eq(sha3(selfptr, len), sha3(needleptr, len)) 449 } 450 } 451 452 if (equal) { 453 self._len -= needle._len; 454 } 455 456 return self; 457 } 458 459 // Returns the memory address of the first byte of the first occurrence of 460 // `needle` in `self`, or the first byte after `self` if not found. 461 function findPtr(uint selflen, uint selfptr, uint needlelen, uint needleptr) private returns (uint) { 462 uint ptr; 463 uint idx; 464 465 if (needlelen <= selflen) { 466 if (needlelen <= 32) { 467 // Optimized assembly for 68 gas per byte on short strings 468 assembly { 469 let mask := not(sub(exp(2, mul(8, sub(32, needlelen))), 1)) 470 let needledata := and(mload(needleptr), mask) 471 let end := add(selfptr, sub(selflen, needlelen)) 472 ptr := selfptr 473 loop: 474 jumpi(exit, eq(and(mload(ptr), mask), needledata)) 475 ptr := add(ptr, 1) 476 jumpi(loop, lt(sub(ptr, 1), end)) 477 ptr := add(selfptr, selflen) 478 exit: 479 } 480 return ptr; 481 } else { 482 // For long needles, use hashing 483 bytes32 hash; 484 assembly { hash := sha3(needleptr, needlelen) } 485 ptr = selfptr; 486 for (idx = 0; idx <= selflen - needlelen; idx++) { 487 bytes32 testHash; 488 assembly { testHash := sha3(ptr, needlelen) } 489 if (hash == testHash) 490 return ptr; 491 ptr += 1; 492 } 493 } 494 } 495 return selfptr + selflen; 496 } 497 498 // Returns the memory address of the first byte after the last occurrence of 499 // `needle` in `self`, or the address of `self` if not found. 500 function rfindPtr(uint selflen, uint selfptr, uint needlelen, uint needleptr) private returns (uint) { 501 uint ptr; 502 503 if (needlelen <= selflen) { 504 if (needlelen <= 32) { 505 // Optimized assembly for 69 gas per byte on short strings 506 assembly { 507 let mask := not(sub(exp(2, mul(8, sub(32, needlelen))), 1)) 508 let needledata := and(mload(needleptr), mask) 509 ptr := add(selfptr, sub(selflen, needlelen)) 510 loop: 511 jumpi(ret, eq(and(mload(ptr), mask), needledata)) 512 ptr := sub(ptr, 1) 513 jumpi(loop, gt(add(ptr, 1), selfptr)) 514 ptr := selfptr 515 jump(exit) 516 ret: 517 ptr := add(ptr, needlelen) 518 exit: 519 } 520 return ptr; 521 } else { 522 // For long needles, use hashing 523 bytes32 hash; 524 assembly { hash := sha3(needleptr, needlelen) } 525 ptr = selfptr + (selflen - needlelen); 526 while (ptr >= selfptr) { 527 bytes32 testHash; 528 assembly { testHash := sha3(ptr, needlelen) } 529 if (hash == testHash) 530 return ptr + needlelen; 531 ptr -= 1; 532 } 533 } 534 } 535 return selfptr; 536 } 537 538 /* 539 * @dev Modifies `self` to contain everything from the first occurrence of 540 * `needle` to the end of the slice. `self` is set to the empty slice 541 * if `needle` is not found. 542 * @param self The slice to search and modify. 543 * @param needle The text to search for. 544 * @return `self`. 545 */ 546 function find(slice self, slice needle) internal returns (slice) { 547 uint ptr = findPtr(self._len, self._ptr, needle._len, needle._ptr); 548 self._len -= ptr - self._ptr; 549 self._ptr = ptr; 550 return self; 551 } 552 553 /* 554 * @dev Modifies `self` to contain the part of the string from the start of 555 * `self` to the end of the first occurrence of `needle`. If `needle` 556 * is not found, `self` is set to the empty slice. 557 * @param self The slice to search and modify. 558 * @param needle The text to search for. 559 * @return `self`. 560 */ 561 function rfind(slice self, slice needle) internal returns (slice) { 562 uint ptr = rfindPtr(self._len, self._ptr, needle._len, needle._ptr); 563 self._len = ptr - self._ptr; 564 return self; 565 } 566 567 /* 568 * @dev Splits the slice, setting `self` to everything after the first 569 * occurrence of `needle`, and `token` to everything before it. If 570 * `needle` does not occur in `self`, `self` is set to the empty slice, 571 * and `token` is set to the entirety of `self`. 572 * @param self The slice to split. 573 * @param needle The text to search for in `self`. 574 * @param token An output parameter to which the first token is written. 575 * @return `token`. 576 */ 577 function split(slice self, slice needle, slice token) internal returns (slice) { 578 uint ptr = findPtr(self._len, self._ptr, needle._len, needle._ptr); 579 token._ptr = self._ptr; 580 token._len = ptr - self._ptr; 581 if (ptr == self._ptr + self._len) { 582 // Not found 583 self._len = 0; 584 } else { 585 self._len -= token._len + needle._len; 586 self._ptr = ptr + needle._len; 587 } 588 return token; 589 } 590 591 /* 592 * @dev Splits the slice, setting `self` to everything after the first 593 * occurrence of `needle`, and returning everything before it. If 594 * `needle` does not occur in `self`, `self` is set to the empty slice, 595 * and the entirety of `self` is returned. 596 * @param self The slice to split. 597 * @param needle The text to search for in `self`. 598 * @return The part of `self` up to the first occurrence of `delim`. 599 */ 600 function split(slice self, slice needle) internal returns (slice token) { 601 split(self, needle, token); 602 } 603 604 /* 605 * @dev Splits the slice, setting `self` to everything before the last 606 * occurrence of `needle`, and `token` to everything after it. If 607 * `needle` does not occur in `self`, `self` is set to the empty slice, 608 * and `token` is set to the entirety of `self`. 609 * @param self The slice to split. 610 * @param needle The text to search for in `self`. 611 * @param token An output parameter to which the first token is written. 612 * @return `token`. 613 */ 614 function rsplit(slice self, slice needle, slice token) internal returns (slice) { 615 uint ptr = rfindPtr(self._len, self._ptr, needle._len, needle._ptr); 616 token._ptr = ptr; 617 token._len = self._len - (ptr - self._ptr); 618 if (ptr == self._ptr) { 619 // Not found 620 self._len = 0; 621 } else { 622 self._len -= token._len + needle._len; 623 } 624 return token; 625 } 626 627 /* 628 * @dev Splits the slice, setting `self` to everything before the last 629 * occurrence of `needle`, and returning everything after it. If 630 * `needle` does not occur in `self`, `self` is set to the empty slice, 631 * and the entirety of `self` is returned. 632 * @param self The slice to split. 633 * @param needle The text to search for in `self`. 634 * @return The part of `self` after the last occurrence of `delim`. 635 */ 636 function rsplit(slice self, slice needle) internal returns (slice token) { 637 rsplit(self, needle, token); 638 } 639 640 /* 641 * @dev Counts the number of nonoverlapping occurrences of `needle` in `self`. 642 * @param self The slice to search. 643 * @param needle The text to search for in `self`. 644 * @return The number of occurrences of `needle` found in `self`. 645 */ 646 function count(slice self, slice needle) internal returns (uint count) { 647 uint ptr = findPtr(self._len, self._ptr, needle._len, needle._ptr) + needle._len; 648 while (ptr <= self._ptr + self._len) { 649 count++; 650 ptr = findPtr(self._len - (ptr - self._ptr), ptr, needle._len, needle._ptr) + needle._len; 651 } 652 } 653 654 /* 655 * @dev Returns True if `self` contains `needle`. 656 * @param self The slice to search. 657 * @param needle The text to search for in `self`. 658 * @return True if `needle` is found in `self`, false otherwise. 659 */ 660 function contains(slice self, slice needle) internal returns (bool) { 661 return rfindPtr(self._len, self._ptr, needle._len, needle._ptr) != self._ptr; 662 } 663 664 /* 665 * @dev Returns a newly allocated string containing the concatenation of 666 * `self` and `other`. 667 * @param self The first slice to concatenate. 668 * @param other The second slice to concatenate. 669 * @return The concatenation of the two strings. 670 */ 671 function concat(slice self, slice other) internal returns (string) { 672 var ret = new string(self._len + other._len); 673 uint retptr; 674 assembly { retptr := add(ret, 32) } 675 memcpy(retptr, self._ptr, self._len); 676 memcpy(retptr + self._len, other._ptr, other._len); 677 return ret; 678 } 679 680 /* 681 * @dev Joins an array of slices, using `self` as a delimiter, returning a 682 * newly allocated string. 683 * @param self The delimiter to use. 684 * @param parts A list of slices to join. 685 * @return A newly allocated string containing all the slices in `parts`, 686 * joined with `self`. 687 */ 688 function join(slice self, slice[] parts) internal returns (string) { 689 if (parts.length == 0) 690 return ""; 691 692 uint len = self._len * (parts.length - 1); 693 for(uint i = 0; i < parts.length; i++) 694 len += parts[i]._len; 695 696 var ret = new string(len); 697 uint retptr; 698 assembly { retptr := add(ret, 32) } 699 700 for(i = 0; i < parts.length; i++) { 701 memcpy(retptr, parts[i]._ptr, parts[i]._len); 702 retptr += parts[i]._len; 703 if (i < parts.length - 1) { 704 memcpy(retptr, self._ptr, self._len); 705 retptr += self._len; 706 } 707 } 708 709 return ret; 710 } 711 }