/ CFBurstTrie.c
CFBurstTrie.c
1 /* 2 * Copyright (c) 2015 Apple Inc. All rights reserved. 3 * 4 * @APPLE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. Please obtain a copy of the License at 10 * http://www.opensource.apple.com/apsl/ and read it before using this 11 * file. 12 * 13 * The Original Code and all software distributed under the License are 14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 18 * Please see the License for the specific language governing rights and 19 * limitations under the License. 20 * 21 * @APPLE_LICENSE_HEADER_END@ 22 */ 23 24 /* CFBurstTrie.c 25 Copyright (c) 2008-2014, Apple Inc. All rights reserved. 26 Responsibility: Jennifer Moore 27 */ 28 29 #include "CFInternal.h" 30 #include "CFBurstTrie.h" 31 #include "CFByteOrder.h" 32 #include "CFNumber.h" 33 #include <stdio.h> 34 #include <string.h> 35 #include <stdlib.h> 36 #include <fcntl.h> 37 #include <sys/stat.h> 38 #include <limits.h> 39 #if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_EMBEDDED_MINI || DEPLOYMENT_TARGET_LINUX 40 #include <unistd.h> 41 #include <sys/param.h> 42 #include <sys/mman.h> 43 #endif 44 45 #include <errno.h> 46 #include <assert.h> 47 48 #if DEPLOYMENT_TARGET_WINDOWS 49 #define open _NS_open 50 #define statinfo _stat 51 #define stat(x,y) _NS_stat(x,y) 52 #define __builtin_memcmp(x, y, z) memcmp(x, y, z) 53 #define __builtin_popcountll(x) popcountll(x) 54 #define bzero(dst, size) ZeroMemory(dst, size) 55 #define S_IWUSR 0 56 #define S_IRUSR 0 57 58 static int pwrite(int fd, const void *buf, size_t nbyte, off_t offset) { 59 // Get where we are 60 long pos = _tell(fd); 61 62 // Move to new offset 63 _lseek(fd, offset, SEEK_SET); 64 65 // Write data 66 int res = _write(fd, buf, nbyte); 67 68 // Return to previous offset 69 _lseek(fd, pos, SEEK_SET); 70 71 return res; 72 } 73 74 #else 75 #define statinfo stat 76 #endif 77 78 #if 0 79 #pragma mark Types and Utilities 80 #endif 81 82 #define MAX_STRING_ALLOCATION_SIZE 342 83 #define MAX_STRING_SIZE 1024 84 #define MAX_KEY_LENGTH MAX_STRING_SIZE * 4 85 #define CHARACTER_SET_SIZE 256 86 #define MAX_LIST_SIZE 256 // 64 87 #define MAX_BITMAP_SIZE 200 88 #define MAX_BUFFER_SIZE (4096<<2) 89 90 #define NextTrie_GetPtr(p) (p & ((~(uintptr_t)0)-3)) 91 #define NextTrie_GetKind(p) (p & 3) 92 #define NextTrie_SetKind(p, kind) (p |= (3&kind)) 93 94 #define DiskNextTrie_GetPtr(map,offset) (((uintptr_t)map) + (uintptr_t)(offset & ((~(uintptr_t)0)-3))) 95 #define DiskNextTrie_GetKind(p) (p & 3) 96 #define DiskNextTrie_SetKind(p, kind) (p |= (3&kind)) 97 98 // Use this macro to avoid forgetting to check the pointer before assigning value to it. 99 #define SetPayload(pointer, value) do { if (pointer) *pointer = value; } while (0) 100 101 enum { Nothing = 0, TrieKind = 1, ListKind = 2, CompactTrieKind = 3 }; 102 typedef enum { FailedInsert = 0, NewTerm = 1, ExistingTerm = 2 } CFBTInsertCode; 103 104 #pragma pack (1) 105 typedef uintptr_t NextTrie; 106 107 typedef struct _TrieLevel { 108 NextTrie slots[CHARACTER_SET_SIZE]; 109 uint32_t weight; 110 uint32_t payload; 111 } TrieLevel; 112 typedef TrieLevel *TrieLevelRef; 113 114 typedef struct _MapTrieLevel { 115 uint32_t slots[CHARACTER_SET_SIZE]; 116 uint32_t payload; 117 } MapTrieLevel; 118 typedef MapTrieLevel *MapTrieLevelRef; 119 120 typedef struct _CompactMapTrieLevel { 121 uint64_t bitmap[CHARACTER_SET_SIZE / 64]; 122 uint32_t payload; 123 uint32_t slots[]; 124 } CompactMapTrieLevel; 125 typedef CompactMapTrieLevel *CompactMapTrieLevelRef; 126 127 typedef struct _ListNode { 128 struct _ListNode *next; 129 uint32_t weight; 130 uint32_t payload; 131 uint16_t length; 132 UInt8 string[]; 133 }* ListNodeRef; 134 135 typedef struct _Page { 136 uint32_t length; 137 char data[]; 138 } Page; 139 140 typedef struct _PageEntryPacked { 141 uint8_t pfxLen; 142 uint16_t strlen; 143 uint32_t payload; 144 UInt8 string[]; 145 } PageEntryPacked; 146 147 typedef struct _PageEntry { 148 uint16_t strlen; 149 uint32_t payload; 150 UInt8 string[]; 151 } PageEntry; 152 153 typedef struct _TrieHeader { 154 uint32_t signature; 155 uint32_t rootOffset; 156 uint32_t count; 157 uint32_t size; 158 uint32_t flags; 159 uint64_t reserved[16]; 160 } TrieHeader; 161 162 typedef struct _TrieCursor { 163 uint64_t signature; 164 uint64_t counter; 165 NextTrie next; 166 uint32_t keylen; 167 uint32_t prefixlen; 168 const uint8_t *prefix; 169 uint8_t key[MAX_KEY_LENGTH]; 170 } TrieCursor; 171 172 typedef struct _MapCursor { 173 uint64_t signature; 174 TrieHeader *header; 175 uint32_t next; 176 uint32_t prefixlen; 177 uint32_t keylen; 178 const uint8_t *prefix; 179 uint8_t key[MAX_STRING_SIZE*4]; 180 } MapCursor; 181 182 typedef struct _CompactMapCursor { 183 uint32_t next; 184 uint32_t entryOffsetInPage; 185 uint32_t offsetInEntry; 186 uint32_t payload; 187 // On a page, the first entry could has 0 strlen. So we need this variable to tell us whether 188 // the cursor is merely pointing at the beginning of the page, or the first entry. 189 // For example, if the trie contains "ab" and "abc", where "a" is stored on an array level, 190 // while "b" and "bc" are stored on a page level. If we creat a cursor for string "a", this cursor 191 // will point at the beginning of the page, but not at any particular key. The both entryOffsetInPage and 192 // offsetInEntry fields of the cursor are set to 0 in this case. Now if we add "a" to the 193 // trie. the page level will actually contains three entries. The first entry corresponds to string "a". 194 // That entry has 0 strlen value. If we creat a cursor for string "a" again, this cursor will 195 // point at the first entry on the page. But the entryOffsetInPage and offsetInEntry fields are still 196 // set to 0s. So we need an additional variable to make distinction between these two situations. 197 BOOL isOnPage; 198 } CompactMapCursor; 199 typedef struct _CompactMapCursor *MapCursorRef; 200 201 enum { 202 _kCFBurstTrieCursorTrieType = 0, 203 _kCFBurstTrieCursorMapType 204 }; 205 206 typedef struct _CFBurstTrieCursor { 207 CompactMapCursor mapCursor; 208 CFIndex cursorType; 209 CFBurstTrieRef trie; 210 } _CFBurstTrieCursor; 211 212 // ** Legacy 213 typedef struct _DiskTrieLevel { 214 uint32_t slots[CHARACTER_SET_SIZE]; 215 uint32_t weight; 216 uint32_t payload; 217 } DiskTrieLevel; 218 typedef DiskTrieLevel *DiskTrieLevelRef; 219 220 typedef struct _CompactDiskTrieLevel { 221 uint64_t bitmap[CHARACTER_SET_SIZE / 64]; // CHARACTER_SET_SIZE / 64bits per word 222 uint32_t weight; 223 uint32_t payload; 224 uint32_t slots[]; 225 } CompactDiskTrieLevel; 226 typedef CompactDiskTrieLevel *CompactDiskTrieLevelRef; 227 228 typedef struct _StringPage { 229 uint32_t length; 230 char data[]; 231 } StringPage; 232 233 typedef struct _StringPageEntryPacked { 234 uint8_t pfxLen; 235 uint16_t strlen; // make uint8_t if possible 236 uint32_t payload; 237 char string[]; 238 } StringPageEntryPacked; 239 240 typedef struct _StringPageEntry { 241 uint16_t strlen; // make uint8_t if possible 242 uint32_t payload; 243 char string[]; 244 } StringPageEntry; 245 246 typedef struct _fileHeader { 247 uint32_t signature; 248 uint32_t rootOffset; 249 uint32_t count; 250 uint32_t size; 251 uint32_t flags; 252 } fileHeader; 253 // ** 254 #pragma pack() 255 256 struct _CFBurstTrie { 257 union { 258 TrieLevel root; 259 DiskTrieLevel diskRoot; 260 MapTrieLevel maproot; 261 }; 262 char *mapBase; 263 uint32_t mapSize; 264 uint32_t mapOffset; 265 uint32_t cflags; 266 uint32_t count; 267 uint32_t containerSize; 268 int retain; 269 #if DEPLOYMENT_TARGET_WINDOWS 270 HANDLE mapHandle; 271 HANDLE mappedFileHandle; 272 #endif 273 }; 274 275 #if 0 276 #pragma mark - 277 #pragma mark Forward declarations 278 #endif 279 280 typedef struct _TraverseContext { 281 void *context; 282 void (*callback)(void*, const UInt8*, uint32_t, uint32_t); 283 } TraverseContext; 284 285 static bool foundKey(void *context, const uint8_t *key, uint32_t payload, bool exact) 286 { 287 if (context != NULL) { 288 TraverseContext *ctx = (TraverseContext *)context; 289 if (ctx->context && ctx->callback) { 290 ctx->callback(ctx->context, key, 1, payload); 291 } 292 } 293 return false; 294 } 295 296 void CFBurstTrieTraverseWithCursor(CFBurstTrieRef trie, const uint8_t *prefix, uint32_t prefixLen, void **cursor, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)); 297 298 static CFBTInsertCode addCFBurstTrieLevel(CFBurstTrieRef trie, TrieLevelRef root, const uint8_t *key, uint32_t keylen, uint32_t weight, uint32_t payload); 299 300 static void findCFBurstTrieLevel(CFBurstTrieRef trie, TrieCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool)); 301 static void findCFBurstTrieMappedLevel(CFBurstTrieRef trie, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool)); 302 static void traverseCFBurstTrieLevel(CFBurstTrieRef trie, TrieLevelRef root, TrieCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)); 303 static void traverseCFBurstTrieMappedLevel(CFBurstTrieRef trie, MapTrieLevelRef root, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)); 304 static void traverseCFBurstTrieCompactMappedLevel(CFBurstTrieRef trie, CompactMapTrieLevelRef root, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)); 305 static void traverseCFBurstTrieWithCursor(CFBurstTrieRef trie, const uint8_t *prefix, uint32_t prefixLen, void **cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)); 306 307 static size_t serializeCFBurstTrie(CFBurstTrieRef trie, size_t start_offset, int fd); 308 309 static Boolean burstTrieMappedFind(DiskTrieLevelRef trie, char *map, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix); 310 static Boolean burstTrieMappedPageFind(StringPage *page, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix); 311 static Boolean burstTrieCompactTrieMappedFind(CompactDiskTrieLevelRef trie, char *map, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix); 312 313 static void destroyCFBurstTrie(CFBurstTrieRef trie); 314 static void finalizeCFBurstTrie(TrieLevelRef trie); 315 static void finalizeCFBurstTrieList(ListNodeRef node); 316 317 static int nodeWeightCompare(const void *a, const void *b); 318 static int nodeStringCompare(const void *a, const void *b); 319 320 static bool foundKey(void *context, const uint8_t *key, uint32_t payload, bool exact); 321 static bool containsKey(void *context, const uint8_t *key, uint32_t payload, bool exact); 322 323 static CFIndex burstTrieConvertCharactersToUTF8(UniChar *chars, CFIndex numChars, UInt8 *buffer); 324 325 static Boolean advanceMapCursor(CFBurstTrieRef trie, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length); 326 static Boolean getMapCursorPayload(CFBurstTrieRef trie, const CompactMapCursor *cursor, uint32_t *payload); 327 static void copyMapCursor(const CompactMapCursor *source, CompactMapCursor* destination); 328 static Boolean areMapCursorsEqual(const CompactMapCursor *lhs, const CompactMapCursor *rhs); 329 static void traverseFromMapCursor(CFBurstTrieRef trie, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback); 330 static Boolean getMapCursorPayloadFromPackedPageEntry(PageEntryPacked *entry, const CompactMapCursor *cursor, uint32_t *payload); 331 static Boolean getMapCursorPayloadFromPageEntry(PageEntry *entry, const CompactMapCursor *cursor, uint32_t *payload); 332 333 CFBurstTrieRef CFBurstTrieCreateWithOptions(CFDictionaryRef options) { 334 CFBurstTrieRef trie = NULL; 335 trie = (CFBurstTrieRef) calloc(1, sizeof(struct _CFBurstTrie)); 336 trie->containerSize = MAX_LIST_SIZE; 337 338 CFNumberRef valueAsCFNumber; 339 if (CFDictionaryGetValueIfPresent(options, kCFBurstTrieCreationOptionNameContainerSize, (const void **)&valueAsCFNumber)) { 340 int value; 341 CFNumberGetValue(valueAsCFNumber, kCFNumberIntType, &value); 342 trie->containerSize = value > 2 && value < 4096 ? value : MAX_LIST_SIZE; 343 } 344 trie->retain = 1; 345 return trie; 346 } 347 348 CFBurstTrieRef CFBurstTrieCreate() { 349 CFBurstTrieRef trie = NULL; 350 int listSize = MAX_LIST_SIZE; 351 CFNumberRef value = CFNumberCreate(kCFAllocatorDefault, kCFNumberIntType, &listSize); 352 CFMutableDictionaryRef options = CFDictionaryCreateMutable(kCFAllocatorDefault, 1, NULL, NULL); 353 CFDictionarySetValue(options, kCFBurstTrieCreationOptionNameContainerSize, value); 354 trie = CFBurstTrieCreateWithOptions(options); 355 CFRelease(value); 356 CFRelease(options); 357 return trie; 358 } 359 360 CFBurstTrieRef CFBurstTrieCreateFromFile(CFStringRef path) { 361 struct statinfo sb; 362 char filename[PATH_MAX]; 363 int fd; 364 365 /* Check valid path name */ 366 if (!CFStringGetCString(path, filename, PATH_MAX, kCFStringEncodingUTF8)) return NULL; 367 368 /* Check if file exists */ 369 if (stat(filename, &sb) != 0) return NULL; 370 371 /* Check if file can be opened */ 372 if ((fd=open(filename, CF_OPENFLGS|O_RDONLY)) < 0) return NULL; 373 374 #if DEPLOYMENT_TARGET_WINDOWS 375 HANDLE mappedFileHandle = (HANDLE)_get_osfhandle(fd); 376 if (!mappedFileHandle) return NULL; 377 378 HANDLE mapHandle = CreateFileMapping(mappedFileHandle, NULL, PAGE_READONLY, 0, 0, NULL); 379 if (!mapHandle) return NULL; 380 381 char *map = (char *)MapViewOfFile(mapHandle, FILE_MAP_READ, 0, 0, sb.st_size); 382 if (!map) return NULL; 383 #else 384 char *map = mmap(0, sb.st_size, PROT_READ, MAP_FILE|MAP_SHARED, fd, 0); 385 #endif 386 387 CFBurstTrieRef trie = NULL; 388 TrieHeader *header = (TrieHeader *)map; 389 390 if (((uint32_t*)map)[0] == 0xbabeface) { 391 trie = (CFBurstTrieRef) calloc(1, sizeof(struct _CFBurstTrie)); 392 trie->mapBase = map; 393 trie->mapSize = CFSwapInt32LittleToHost(sb.st_size); 394 trie->mapOffset = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->rootOffset); 395 trie->cflags = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->flags); 396 trie->count = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->count); 397 trie->retain = 1; 398 #if DEPLOYMENT_TARGET_WINDOWS 399 trie->mappedFileHandle = mappedFileHandle; 400 trie->mapHandle = mapHandle; 401 #else 402 // On Windows, the file being mapped must stay open as long as the map exists. Don't close it early. Other platforms close it here. 403 close(fd); 404 #endif 405 } else if (header->signature == 0xcafebabe || header->signature == 0x0ddba11) { 406 trie = (CFBurstTrieRef) calloc(1, sizeof(struct _CFBurstTrie)); 407 trie->mapBase = map; 408 trie->mapSize = CFSwapInt32LittleToHost(sb.st_size); 409 trie->cflags = CFSwapInt32LittleToHost(header->flags); 410 trie->count = CFSwapInt32LittleToHost(header->count); 411 trie->retain = 1; 412 #if DEPLOYMENT_TARGET_WINDOWS 413 trie->mappedFileHandle = mappedFileHandle; 414 trie->mapHandle = mapHandle; 415 #else 416 // On Windows, the file being mapped must stay open as long as the map exists. Don't close it early. Other platforms close it here. 417 close(fd); 418 #endif 419 } else { 420 close(fd); 421 } 422 return trie; 423 } 424 425 CFBurstTrieRef CFBurstTrieCreateFromMapBytes(char *mapBase) { 426 CFBurstTrieRef trie = NULL; 427 TrieHeader *header = (TrieHeader *)mapBase; 428 429 if (mapBase && ((uint32_t*)mapBase)[0] == 0xbabeface) { 430 trie = (CFBurstTrieRef) malloc(sizeof(struct _CFBurstTrie)); 431 trie->mapBase = mapBase; 432 trie->mapSize = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->size); 433 trie->mapOffset = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->rootOffset); 434 trie->cflags = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->flags); 435 trie->count = CFSwapInt32LittleToHost(((fileHeader*)trie->mapBase)->count); 436 trie->retain = 1; 437 } else if (mapBase && (header->signature == 0xcafebabe || header->signature == 0x0ddba11)) { 438 trie = (CFBurstTrieRef) malloc(sizeof(struct _CFBurstTrie)); 439 trie->mapBase = mapBase; 440 trie->mapSize = CFSwapInt32LittleToHost(header->size); 441 trie->cflags = CFSwapInt32LittleToHost(header->flags); 442 trie->count = CFSwapInt32LittleToHost(header->count); 443 trie->retain = 1; 444 } 445 return trie; 446 } 447 448 Boolean CFBurstTrieInsert(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, CFIndex payload) { 449 return CFBurstTrieAddWithWeight(trie, term, termRange, 1, (uint32_t)payload); 450 } 451 452 Boolean CFBurstTrieAdd(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, uint32_t payload) { 453 return CFBurstTrieAddWithWeight(trie, term, termRange, 1, payload); 454 } 455 456 Boolean CFBurstTrieInsertCharacters(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, CFIndex payload) { 457 return CFBurstTrieAddCharactersWithWeight(trie, chars, numChars, 1, (uint32_t)payload); 458 } 459 460 Boolean CFBurstTrieAddCharacters(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, uint32_t payload) { 461 return CFBurstTrieAddCharactersWithWeight(trie, chars, numChars, 1, payload); 462 } 463 464 Boolean CFBurstTrieInsertUTF8String(CFBurstTrieRef trie, UInt8 *chars, CFIndex numChars, CFIndex payload) { 465 return CFBurstTrieAddUTF8StringWithWeight(trie, chars, numChars, 1, (uint32_t)payload); 466 } 467 468 Boolean CFBurstTrieAddUTF8String(CFBurstTrieRef trie, UInt8 *chars, CFIndex numChars, uint32_t payload) { 469 return CFBurstTrieAddUTF8StringWithWeight(trie, chars, numChars, 1, payload); 470 } 471 472 Boolean CFBurstTrieInsertWithWeight(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, CFIndex weight, CFIndex payload) { 473 return CFBurstTrieAddWithWeight(trie, term, termRange, weight, (uint32_t)payload); 474 } 475 476 Boolean CFBurstTrieAddWithWeight(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, uint32_t weight, uint32_t payload) { 477 Boolean success = false; 478 CFIndex size = MAX_STRING_ALLOCATION_SIZE; 479 CFIndex bytesize = termRange.length * 4; //** 4-byte max character size 480 if (!trie->mapBase && termRange.length < MAX_STRING_SIZE && payload > 0) { 481 CFIndex length; 482 UInt8 buffer[MAX_STRING_ALLOCATION_SIZE + 1]; 483 UInt8 *key = buffer; 484 if (bytesize >= size) { 485 size = bytesize; 486 key = (UInt8 *) malloc(sizeof(UInt8) * size + 1); 487 } 488 CFStringGetBytes(term, termRange, kCFStringEncodingUTF8, (UInt8)'-', (Boolean)0, key, size, &length); 489 key[length] = 0; 490 491 success = CFBurstTrieAddUTF8StringWithWeight(trie, key, length, weight, payload); 492 if (buffer != key) free(key); 493 } 494 return success; 495 } 496 497 Boolean CFBurstTrieInsertCharactersWithWeight(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, CFIndex weight, CFIndex payload) { 498 return CFBurstTrieAddCharactersWithWeight(trie, chars, numChars, weight, (uint32_t)payload); 499 } 500 501 Boolean CFBurstTrieAddCharactersWithWeight(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, uint32_t weight, uint32_t payload) { 502 Boolean success = false; 503 CFIndex size = MAX_STRING_ALLOCATION_SIZE; 504 CFIndex bytesize = numChars * 4; //** 4-byte max character size 505 if (!trie->mapBase && numChars < MAX_STRING_SIZE && payload > 0) { 506 CFIndex length; 507 UInt8 buffer[MAX_STRING_ALLOCATION_SIZE + 1]; 508 UInt8 *key = buffer; 509 if (bytesize >= size) { 510 size = bytesize; 511 key = (UInt8 *) malloc(sizeof(UInt8) * size + 1); 512 } 513 length = burstTrieConvertCharactersToUTF8(chars, numChars, key); 514 key[length] = 0; 515 516 success = CFBurstTrieAddUTF8StringWithWeight(trie, key, length, weight, payload); 517 if (buffer != key) free(key); 518 } 519 return success; 520 } 521 522 Boolean CFBurstTrieInsertUTF8StringWithWeight(CFBurstTrieRef trie, UInt8 *chars, CFIndex numChars, CFIndex weight, CFIndex payload) { 523 return CFBurstTrieAddUTF8StringWithWeight(trie, chars, numChars, weight, (uint32_t)payload); 524 } 525 526 Boolean CFBurstTrieAddUTF8StringWithWeight(CFBurstTrieRef trie, UInt8 *chars, CFIndex numChars, uint32_t weight, uint32_t payload) { 527 CFBTInsertCode code = FailedInsert; 528 529 if (!trie->mapBase && numChars < MAX_STRING_SIZE*4 && payload > 0) { 530 code = addCFBurstTrieLevel(trie, &trie->root, chars, numChars, weight, payload); 531 if (code == NewTerm) trie->count++; 532 } 533 return code > FailedInsert; 534 } 535 536 Boolean CFBurstTrieFind(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, CFIndex *payload) { 537 uint32_t p; 538 if (CFBurstTrieContains(trie, term, termRange, &p)) { 539 SetPayload(payload, p); 540 return true; 541 } 542 return false; 543 } 544 545 Boolean CFBurstTrieContains(CFBurstTrieRef trie, CFStringRef term, CFRange termRange, uint32_t *payload) { 546 Boolean success = false; 547 CFIndex size = MAX_STRING_ALLOCATION_SIZE; 548 CFIndex bytesize = termRange.length * 4; //** 4-byte max character size 549 if (termRange.length < MAX_STRING_SIZE) { 550 CFIndex length; 551 UInt8 buffer[MAX_STRING_ALLOCATION_SIZE+1]; 552 UInt8 *key = buffer; 553 if (bytesize >= size) { 554 size = bytesize; 555 key = (UInt8 *) malloc(sizeof(UInt8) * size + 1); 556 } 557 CFStringGetBytes(term, termRange, kCFStringEncodingUTF8, (UInt8)'-', (Boolean)0, key, size, &length); 558 key[length] = 0; 559 560 success = CFBurstTrieContainsUTF8String(trie, key, length, payload); 561 if (buffer != key) free(key); 562 } 563 return success; 564 } 565 566 Boolean CFBurstTrieFindCharacters(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, CFIndex *payload) { 567 uint32_t p; 568 if (CFBurstTrieContainsCharacters(trie, chars, numChars, &p)) { 569 SetPayload(payload, p); 570 return true; 571 } 572 return false; 573 } 574 575 Boolean CFBurstTrieContainsCharacters(CFBurstTrieRef trie, UniChar *chars, CFIndex numChars, uint32_t *payload) { 576 Boolean success = false; 577 CFIndex size = MAX_STRING_ALLOCATION_SIZE; 578 CFIndex bytesize = numChars * 4; //** 4-byte max character size 579 if (numChars < MAX_STRING_SIZE) { 580 CFIndex length; 581 UInt8 buffer[MAX_STRING_ALLOCATION_SIZE + 1]; 582 UInt8 *key = buffer; 583 if (bytesize >= size) { 584 size = bytesize; 585 key = (UInt8 *) malloc(sizeof(UInt8) * size + 1); 586 } 587 length = burstTrieConvertCharactersToUTF8(chars, numChars, key); 588 key[length] = 0; 589 590 success = CFBurstTrieContainsUTF8String(trie, key, length, payload); 591 if (buffer != key) free(key); 592 } 593 return success; 594 } 595 596 Boolean CFBurstTrieFindUTF8String(CFBurstTrieRef trie, UInt8 *key, CFIndex length, CFIndex *payload) { 597 uint32_t p; 598 if (CFBurstTrieContainsUTF8String(trie, key, length, &p)) { 599 SetPayload(payload, p); 600 return true; 601 } 602 return false; 603 } 604 605 Boolean CFBurstTrieContainsUTF8String(CFBurstTrieRef trie, UInt8 *key, CFIndex length, uint32_t *payload) { 606 Boolean success = false; 607 if (length < MAX_STRING_SIZE) { 608 if (trie->mapBase && ((fileHeader *)trie->mapBase)->signature == 0xbabeface) { 609 bool prefix = (trie->cflags & kCFBurstTriePrefixCompression); 610 success = burstTrieMappedFind((DiskTrieLevelRef)(trie->mapBase+CFSwapInt32LittleToHost((((uint32_t*)trie->mapBase)[1]))), trie->mapBase, key, length, payload, prefix); 611 } else if (trie->mapBase && trie->cflags & (kCFBurstTriePrefixCompression | kCFBurstTrieSortByKey)) { 612 _CFBurstTrieCursor cursor; 613 if (!CFBurstTrieSetCursorForBytes(trie, &cursor, key, length)) 614 return FALSE; 615 return CFBurstTrieCursorGetPayload(&cursor, payload); 616 } else { 617 uint32_t found = 0; 618 void *cursor = 0; 619 traverseCFBurstTrieWithCursor(trie, key, length, &cursor, true, &found, containsKey); 620 if (found) SetPayload(payload, found); 621 success = found > 0; 622 } 623 } 624 return success; 625 } 626 627 Boolean CFBurstTrieSerialize(CFBurstTrieRef trie, CFStringRef path, CFBurstTrieOpts opts) { 628 Boolean success = false; 629 if (trie->mapBase) { 630 return success; 631 } else { 632 int fd; 633 char filename[PATH_MAX]; 634 635 /* Check valid path name */ 636 if (!CFStringGetCString(path, filename, PATH_MAX, kCFStringEncodingUTF8)) return success; 637 638 /* Check if file can be opened */ 639 if ((fd=open(filename, CF_OPENFLGS|O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR)) < 0) return success; 640 641 if (CFBurstTrieSerializeWithFileDescriptor(trie, fd, opts)) success = true; 642 643 close(fd); 644 } 645 return success; 646 } 647 648 Boolean CFBurstTrieSerializeWithFileDescriptor(CFBurstTrieRef trie, int fd, CFBurstTrieOpts opts) { 649 Boolean success = false; 650 if (!trie->mapBase && fd >= 0) { 651 off_t start_offset = lseek(fd, 0, SEEK_END); 652 653 trie->cflags = opts; 654 trie->mapSize = serializeCFBurstTrie(trie, start_offset, fd); 655 656 #if DEPLOYMENT_TARGET_WINDOWS 657 HANDLE mappedFileHandle = (HANDLE)_get_osfhandle(fd); 658 // We need to make sure we have our own handle to keep this file open as long as the mmap lasts 659 DuplicateHandle(GetCurrentProcess(), mappedFileHandle, GetCurrentProcess(), &mappedFileHandle, 0, 0, DUPLICATE_SAME_ACCESS); 660 HANDLE mapHandle = CreateFileMapping(mappedFileHandle, NULL, PAGE_READONLY, 0, 0, NULL); 661 if (!mapHandle) return NULL; 662 char *map = (char *)MapViewOfFile(mapHandle, FILE_MAP_READ, 0, start_offset, trie->mapSize); 663 if (!map) return NULL; 664 trie->mapBase = map; 665 trie->mapHandle = mapHandle; 666 trie->mappedFileHandle = mappedFileHandle; 667 #else 668 trie->mapBase = mmap(0, trie->mapSize, PROT_READ, MAP_FILE|MAP_SHARED, fd, start_offset); 669 #endif 670 success = true; 671 } 672 673 return success; 674 } 675 676 void CFBurstTrieTraverse(CFBurstTrieRef trie, void *ctx, void (*callback)(void*, const UInt8*, uint32_t, uint32_t)) { 677 TrieHeader *header = (TrieHeader *)trie->mapBase; 678 if (!trie->mapBase || (header->signature == 0xcafebabe || header->signature == 0x0ddba11)) { 679 void *cursor = 0; 680 TraverseContext context; 681 context.context = ctx; 682 context.callback = callback; 683 traverseCFBurstTrieWithCursor(trie, (const uint8_t *)"", 0, &cursor, false, &context, foundKey); 684 } 685 } 686 687 688 void CFBurstTrieTraverseWithCursor(CFBurstTrieRef trie, const uint8_t *prefix, uint32_t prefixLen, void **cursor, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)) 689 { 690 traverseCFBurstTrieWithCursor(trie, prefix, prefixLen, cursor, false, ctx, callback); 691 } 692 693 void CFBurstTriePrint(CFBurstTrieRef trie) { 694 695 } 696 697 CFIndex CFBurstTrieGetCount(CFBurstTrieRef trie) { 698 return trie->count; 699 } 700 701 CFBurstTrieRef CFBurstTrieRetain(CFBurstTrieRef trie) { 702 trie->retain++; 703 return trie; 704 } 705 706 void CFBurstTrieRelease(CFBurstTrieRef trie) { 707 trie->retain--; 708 if (trie->retain == 0) destroyCFBurstTrie(trie); 709 return; 710 } 711 712 Boolean CFBurstTrieSetCursorForBytes(CFBurstTrieRef trie, CFBurstTrieCursorRef cursor, const UInt8* bytes, CFIndex length) 713 { 714 if (!trie->mapBase || !(trie->cflags & (kCFBurstTriePrefixCompression | kCFBurstTrieSortByKey))) { 715 //fprintf(stderr, "CFBurstTrieCreateCursorForBytes() only support file based trie in prefix compression format.\n"); 716 return FALSE; 717 } 718 719 TrieHeader *header = (TrieHeader*)trie->mapBase; 720 if (length < 0 || !trie) 721 return FALSE; 722 723 cursor->trie = trie; 724 if (trie->mapBase) { 725 cursor->cursorType = _kCFBurstTrieCursorMapType; 726 cursor->mapCursor.next = header->rootOffset; 727 cursor->mapCursor.isOnPage = FALSE; 728 cursor->mapCursor.entryOffsetInPage = 0; 729 cursor->mapCursor.offsetInEntry = 0; 730 cursor->mapCursor.payload = 0; 731 } else 732 assert(false); 733 734 if (!bytes || length == 0) 735 return TRUE; 736 737 return CFBurstTrieCursorAdvanceForBytes(cursor, bytes, length); 738 } 739 740 741 CFBurstTrieCursorRef CFBurstTrieCreateCursorForBytes(CFBurstTrieRef trie, const UInt8* bytes, CFIndex length) 742 { 743 CFBurstTrieCursorRef cursor = (CFBurstTrieCursorRef)calloc(sizeof(_CFBurstTrieCursor), 1); 744 if (!CFBurstTrieSetCursorForBytes(trie, cursor, bytes, length)) { 745 CFBurstTrieCursorRelease(cursor); 746 return NULL; 747 } 748 return cursor; 749 } 750 751 CFBurstTrieCursorRef CFBurstTrieCursorCreateByCopy(CFBurstTrieCursorRef cursor) 752 { 753 if (!cursor) 754 return NULL; 755 756 CFBurstTrieCursorRef newCursor = (CFBurstTrieCursorRef)calloc(sizeof(_CFBurstTrieCursor), 1); 757 switch (cursor->cursorType) { 758 case _kCFBurstTrieCursorMapType: 759 copyMapCursor(&cursor->mapCursor, &newCursor->mapCursor); 760 break; 761 case _kCFBurstTrieCursorTrieType: 762 assert(false); 763 break; 764 } 765 newCursor->cursorType = cursor->cursorType; 766 newCursor->trie = cursor->trie; 767 return newCursor; 768 } 769 770 Boolean CFBurstTrieCursorIsEqual(CFBurstTrieCursorRef lhs, CFBurstTrieCursorRef rhs) 771 { 772 if (lhs->trie != rhs->trie || lhs->cursorType != rhs->cursorType) 773 return FALSE; 774 775 if (lhs->cursorType == _kCFBurstTrieCursorMapType) 776 return areMapCursorsEqual(&lhs->mapCursor, &rhs->mapCursor); 777 else 778 return FALSE; 779 } 780 781 Boolean CFBurstTrieCursorAdvanceForBytes(CFBurstTrieCursorRef cursor, const UInt8* bytes, CFIndex length) 782 { 783 switch (cursor->cursorType) { 784 case _kCFBurstTrieCursorMapType: { 785 CompactMapCursor tempCursor; 786 copyMapCursor(&cursor->mapCursor, &tempCursor); 787 if (advanceMapCursor(cursor->trie, (CompactMapCursor*)&cursor->mapCursor, bytes, length)) 788 return TRUE; 789 else { 790 copyMapCursor(&tempCursor, &cursor->mapCursor); 791 return FALSE; 792 } 793 } 794 case _kCFBurstTrieCursorTrieType: 795 return FALSE; 796 } 797 return FALSE; 798 } 799 800 Boolean CFBurstTrieCursorGetPayload(CFBurstTrieCursorRef cursor, uint32_t *payload) 801 { 802 switch (cursor->cursorType) { 803 case _kCFBurstTrieCursorMapType: 804 return getMapCursorPayload(cursor->trie, (CompactMapCursor*)&cursor->mapCursor, payload); 805 case _kCFBurstTrieCursorTrieType: 806 return FALSE; 807 } 808 return FALSE; 809 } 810 811 void CFBurstTrieCursorRelease(CFBurstTrieCursorRef cursor) 812 { 813 if (!cursor) 814 return; 815 free(cursor); 816 } 817 818 void CFBurstTrieTraverseFromCursor(CFBurstTrieCursorRef cursor, void *ctx, CFBurstTrieTraversalCallback callback) 819 { 820 if (!cursor) 821 return; 822 823 UInt8 *bytes = (UInt8*)calloc(1, MAX_KEY_LENGTH); 824 uint32_t capacity = MAX_KEY_LENGTH; 825 uint32_t length = 0; 826 Boolean stop = FALSE; 827 switch (cursor->cursorType) { 828 case _kCFBurstTrieCursorMapType: { 829 CompactMapCursor tempCursor; 830 copyMapCursor(&cursor->mapCursor, &tempCursor); 831 traverseFromMapCursor(cursor->trie, &tempCursor, bytes, capacity,length, &stop, ctx, callback); 832 break; 833 } 834 case _kCFBurstTrieCursorTrieType: 835 break; 836 } 837 free(bytes); 838 } 839 840 #if 0 841 #pragma mark - 842 #pragma mark Insertion 843 #endif 844 845 static ListNodeRef makeCFBurstTrieListNode(const uint8_t *key, uint32_t keylen, uint32_t weight, uint32_t payload) { 846 ListNodeRef node = (ListNodeRef) calloc(1, sizeof(*node) + keylen + 1); 847 memcpy(node->string, key, keylen); 848 node->string[keylen] = 0; 849 node->next = 0; 850 node->length = keylen; 851 node->weight = weight; 852 node->payload = payload; 853 return node; 854 } 855 856 static void addCFBurstTrieBurstLevel(CFBurstTrieRef trie, TrieLevelRef root, const uint8_t *key, uint32_t keylen, uint32_t weight, uint32_t payload) { 857 if (keylen) { 858 NextTrie next = root->slots[*key]; 859 ListNodeRef newNode = makeCFBurstTrieListNode(key+1, keylen-1, weight, payload); 860 newNode->weight = weight; 861 newNode->next = (ListNodeRef) NextTrie_GetPtr(next); 862 next = (uintptr_t) newNode; 863 NextTrie_SetKind(next, ListKind); 864 root->slots[*key] = next; 865 } else { 866 // ** Handle payload. 867 root->weight = weight; 868 root->payload = payload; 869 } 870 } 871 872 static TrieLevelRef burstCFBurstTrieLevel(CFBurstTrieRef trie, ListNodeRef list, uint32_t listCount) { 873 TrieLevelRef newLevel = (TrieLevelRef) calloc(1, sizeof(struct _TrieLevel)); 874 while (list) { 875 addCFBurstTrieBurstLevel(trie, newLevel, list->string, list->length, list->weight, list->payload); 876 ListNodeRef temp = list; 877 list = list->next; 878 free(temp); 879 } 880 return newLevel; 881 } 882 883 static CFBTInsertCode addCFBurstTrieListNode(CFBurstTrieRef trie, ListNodeRef list, const uint8_t *key, uint32_t keylen, uint32_t weight, uint32_t payload, uint32_t *listCount) 884 { 885 CFBTInsertCode code = FailedInsert; 886 uint32_t count = 1; 887 888 ListNodeRef last = list; 889 while (list) { 890 if (list->length == keylen && memcmp(key, list->string, keylen) == 0) { 891 list->weight += weight; 892 list->payload = payload; 893 code = ExistingTerm; 894 break; 895 } else { 896 count++; 897 last = list; 898 list = list->next; 899 } 900 } 901 902 if (!list) { 903 last->next = makeCFBurstTrieListNode(key, keylen, weight, payload); 904 code = NewTerm; 905 } 906 907 *listCount = count; 908 return code; 909 } 910 911 static CFBTInsertCode addCFBurstTrieLevel(CFBurstTrieRef trie, TrieLevelRef root, const uint8_t *key, uint32_t keylen, uint32_t weight, uint32_t payload) 912 { 913 CFBTInsertCode code = FailedInsert; 914 if (keylen) { 915 NextTrie next = root->slots[*key]; 916 if (NextTrie_GetKind(next) == TrieKind) { 917 TrieLevelRef nextLevel = (TrieLevelRef) NextTrie_GetPtr(next); 918 code = addCFBurstTrieLevel(trie, nextLevel, key+1, keylen-1, weight, payload); 919 } else { 920 if (NextTrie_GetKind(next) == ListKind) { 921 uint32_t listCount; 922 ListNodeRef listNode = (ListNodeRef) NextTrie_GetPtr(next); 923 code = addCFBurstTrieListNode(trie, listNode, key+1, keylen-1, weight, payload, &listCount); 924 if (listCount > trie->containerSize) { 925 next = (uintptr_t) burstCFBurstTrieLevel(trie, listNode, listCount); 926 NextTrie_SetKind(next, TrieKind); 927 } 928 } else { 929 // ** Make a new list node 930 next = (uintptr_t) makeCFBurstTrieListNode(key+1, keylen-1, weight, payload); 931 NextTrie_SetKind(next, ListKind); 932 code = NewTerm; 933 } 934 root->slots[*key] = next; 935 } 936 } else { 937 // ** Handle payload 938 if (!root->weight) code = NewTerm; 939 else code = ExistingTerm; 940 root->weight += weight; 941 root->payload = payload; 942 } 943 944 return code; 945 } 946 #if 0 947 #pragma mark - 948 #pragma mark Searching 949 #endif 950 static void findCFBurstTrieList(CFBurstTrieRef trie, TrieCursor *cursor, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool)) 951 { 952 ListNodeRef list = (ListNodeRef)NextTrie_GetPtr(cursor->next); 953 int len = cursor->prefixlen-cursor->keylen; 954 len = len <= 0 ? 0 : len; 955 while (list) { 956 int lencompare = list->length-len; 957 if (list->length >= len && 958 (len == 0 || memcmp(list->string, cursor->prefix+cursor->keylen, len) == 0)) { 959 memcpy(cursor->key+cursor->keylen, list->string, list->length); 960 cursor->key[cursor->keylen+list->length] = 0; 961 cursor->next = (NextTrie)list; 962 if (list->payload && callback(ctx, cursor->key, list->payload, lencompare==0)) return; 963 } 964 list = list->next; 965 } 966 } 967 968 static void findCFBurstTrieMappedPage(CFBurstTrieRef trie, MapCursor *cursor, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool)) 969 { 970 Page *page = (Page *)DiskNextTrie_GetPtr(trie->mapBase, cursor->next); 971 uint32_t end = page->length; 972 uint32_t cur = 0; 973 int len = cursor->prefixlen-cursor->keylen; 974 len = len <= 0 ? 0 : len; 975 if (trie->cflags & kCFBurstTriePrefixCompression) { 976 uint8_t pfx[CHARACTER_SET_SIZE]; 977 PageEntryPacked *lastEntry = 0; 978 while (cur < end) { 979 PageEntryPacked *entry = (PageEntryPacked *)&page->data[cur]; 980 int lencompare = (entry->strlen+entry->pfxLen)-len; 981 if (lastEntry && entry->pfxLen>lastEntry->pfxLen) memcpy(pfx+lastEntry->pfxLen, lastEntry->string, entry->pfxLen-lastEntry->pfxLen); 982 if (lencompare >= 0 && 983 (len == 0 || (__builtin_memcmp(pfx, cursor->prefix+cursor->keylen, entry->pfxLen) == 0 && 984 __builtin_memcmp(entry->string, cursor->prefix+cursor->keylen+entry->pfxLen, cursor->prefixlen-cursor->keylen-entry->pfxLen) == 0))) { 985 memcpy(cursor->key+cursor->keylen, pfx, entry->pfxLen); 986 memcpy(cursor->key+cursor->keylen+entry->pfxLen, entry->string, entry->strlen); 987 cursor->key[cursor->keylen+entry->pfxLen+entry->strlen] = 0; 988 if (entry->payload && callback(ctx, (const uint8_t *)cursor->key, entry->payload, lencompare==0)) return; 989 } 990 lastEntry = entry; 991 cur += sizeof(*entry) + entry->strlen; 992 } 993 } else { 994 while (cur < end) { 995 PageEntry *entry = (PageEntry *)&page->data[cur]; 996 int lencompare = entry->strlen-len; 997 if (lencompare >= 0 && __builtin_memcmp(entry->string, cursor->prefix+cursor->keylen, len) == 0) { 998 memcpy(cursor->key+cursor->keylen, entry->string, entry->strlen); 999 cursor->key[cursor->keylen+entry->strlen] = 0; 1000 if (entry->payload && callback(ctx, (const uint8_t *)cursor->key, entry->payload, lencompare==0)) return; 1001 } 1002 cur += sizeof(*entry) + entry->strlen; 1003 } 1004 } 1005 } 1006 1007 1008 static void findCFBurstTrieLevel(CFBurstTrieRef trie, TrieCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool)) 1009 { 1010 if (cursor->keylen < cursor->prefixlen) { 1011 cursor->next = ((TrieLevelRef)NextTrie_GetPtr(cursor->next))->slots[cursor->prefix[cursor->keylen]]; 1012 cursor->key[cursor->keylen++] = cursor->prefix[cursor->keylen]; 1013 1014 if (NextTrie_GetKind(cursor->next) == TrieKind) { 1015 findCFBurstTrieLevel(trie, cursor, exactmatch, ctx, callback); 1016 } else if (NextTrie_GetKind(cursor->next) == ListKind) { 1017 findCFBurstTrieList(trie, cursor, ctx, callback); 1018 } 1019 } else { 1020 TrieLevelRef root = (TrieLevelRef)NextTrie_GetPtr(cursor->next); 1021 if (root->payload && callback(ctx, cursor->key, root->payload, cursor->prefixlen==cursor->keylen)) return; 1022 if (cursor->keylen == cursor->prefixlen && exactmatch) return; 1023 traverseCFBurstTrieLevel(trie, root, cursor, exactmatch, ctx, callback); 1024 } 1025 } 1026 1027 static void findCFBurstTrieCompactMappedLevel(CFBurstTrieRef trie, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool)) 1028 { 1029 CompactMapTrieLevelRef root = (CompactMapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next); 1030 if (cursor->keylen < cursor->prefixlen) { 1031 uint8_t mykey = *(cursor->prefix+cursor->keylen); 1032 cursor->key[cursor->keylen++] = *(cursor->prefix+cursor->keylen); 1033 1034 uint8_t slot = mykey / 64; 1035 uint8_t bit = mykey % 64; 1036 uint32_t item = 0; 1037 uint64_t bword = root->bitmap[slot]; 1038 1039 if (bword & (1ull << bit)) { 1040 // ** Count all the set bits up to this bit 1041 for (int i=0; i < slot; i++) item += __builtin_popcountll(root->bitmap[i]); 1042 item += __builtin_popcountll(bword & ((1ull << bit)-1)); 1043 uint32_t offset = root->slots[item]; 1044 cursor->next = offset; 1045 1046 if (DiskNextTrie_GetKind(offset) == TrieKind) { 1047 findCFBurstTrieMappedLevel(trie, cursor, exactmatch, ctx, callback); 1048 } else if (DiskNextTrie_GetKind(offset) == CompactTrieKind) { 1049 findCFBurstTrieCompactMappedLevel(trie, cursor, exactmatch, ctx, callback); 1050 } else if (DiskNextTrie_GetKind(offset) == ListKind) { 1051 findCFBurstTrieMappedPage(trie, cursor, ctx, callback); 1052 } 1053 } 1054 } else { 1055 if(root->payload && callback(ctx, cursor->key, root->payload, cursor->prefixlen==cursor->keylen)) return; 1056 if (cursor->keylen == cursor->prefixlen && exactmatch) return; 1057 traverseCFBurstTrieCompactMappedLevel(trie, root, cursor, exactmatch, ctx, callback); 1058 } 1059 } 1060 1061 static void findCFBurstTrieMappedLevel(CFBurstTrieRef trie, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void*, const uint8_t*, uint32_t, bool)) 1062 { 1063 MapTrieLevelRef root = (MapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next); 1064 if (cursor->keylen < cursor->prefixlen) { 1065 uint8_t slot = *(cursor->prefix+cursor->keylen); 1066 cursor->next = root->slots[slot]; 1067 cursor->key[cursor->keylen++] = slot; 1068 1069 if (DiskNextTrie_GetKind(cursor->next) == TrieKind) { 1070 findCFBurstTrieMappedLevel(trie, cursor, exactmatch, ctx, callback); 1071 } else if (DiskNextTrie_GetKind(cursor->next) == CompactTrieKind) { 1072 findCFBurstTrieCompactMappedLevel(trie, cursor, exactmatch, ctx, callback); 1073 } else if (DiskNextTrie_GetKind(cursor->next) == ListKind) { 1074 findCFBurstTrieMappedPage(trie, cursor, ctx, callback); 1075 } 1076 } else { 1077 if (root->payload && callback(ctx, cursor->key, root->payload, cursor->prefixlen==cursor->keylen)) return; 1078 if (cursor->keylen == cursor->prefixlen && exactmatch) return; 1079 traverseCFBurstTrieMappedLevel(trie, root, cursor, exactmatch, ctx, callback); 1080 } 1081 } 1082 1083 static void traverseCFBurstTrieLevel(CFBurstTrieRef trie, TrieLevelRef root, TrieCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)) 1084 { 1085 cursor->key[cursor->keylen] = 0; 1086 uint32_t len = cursor->keylen; 1087 for (int i=0; i < CHARACTER_SET_SIZE; i++) { 1088 NextTrie next = root->slots[i]; 1089 cursor->keylen = len; 1090 cursor->key[cursor->keylen++] = i; 1091 1092 if (NextTrie_GetKind(next) == TrieKind) { 1093 TrieLevelRef level = (TrieLevelRef)NextTrie_GetPtr(next); 1094 if (level->payload && callback(ctx, cursor->key, level->payload, cursor->prefixlen==cursor->keylen)) return; 1095 if (cursor->keylen == cursor->prefixlen && exactmatch) return; 1096 traverseCFBurstTrieLevel(trie, level, cursor, exactmatch, ctx, callback); 1097 } else if (NextTrie_GetKind(next) == ListKind) { 1098 cursor->next = next; 1099 cursor->key[cursor->keylen] = 0; 1100 findCFBurstTrieList(trie, cursor, ctx, callback); 1101 } 1102 } 1103 } 1104 1105 static void traverseCFBurstTrieMappedLevel(CFBurstTrieRef trie, MapTrieLevelRef root, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)) 1106 { 1107 cursor->key[cursor->keylen] = 0; 1108 uint32_t len = cursor->keylen; 1109 1110 for (int i=0; i < CHARACTER_SET_SIZE; i++) { 1111 uint32_t offset = (uint32_t)root->slots[i]; 1112 cursor->keylen = len; 1113 cursor->key[cursor->keylen++] = i; 1114 1115 if (DiskNextTrie_GetKind(offset) == TrieKind) { 1116 MapTrieLevelRef level = (MapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, offset); 1117 if (level->payload && callback(ctx, cursor->key, level->payload, cursor->prefixlen==cursor->keylen)) return; 1118 if (cursor->keylen == cursor->prefixlen && exactmatch) return; 1119 traverseCFBurstTrieMappedLevel(trie, level, cursor, exactmatch, ctx, callback); 1120 } else if (DiskNextTrie_GetKind(offset) == CompactTrieKind) { 1121 CompactMapTrieLevelRef level = (CompactMapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, offset); 1122 if (level->payload && callback(ctx, cursor->key, level->payload, cursor->prefixlen==cursor->keylen)) return; 1123 if (cursor->keylen == cursor->prefixlen && exactmatch) return; 1124 traverseCFBurstTrieCompactMappedLevel(trie, level, cursor, exactmatch, ctx, callback); 1125 } else if (DiskNextTrie_GetKind(offset) == ListKind) { 1126 cursor->next = offset; 1127 cursor->key[cursor->keylen] = 0; 1128 findCFBurstTrieMappedPage(trie, cursor, ctx, callback); 1129 } 1130 } 1131 } 1132 1133 static void traverseCFBurstTrieCompactMappedLevel(CFBurstTrieRef trie, CompactMapTrieLevelRef root, MapCursor *cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)) 1134 { 1135 cursor->key[cursor->keylen] = 0; 1136 uint32_t len = cursor->keylen; 1137 for (uint32_t c=0; c < CHARACTER_SET_SIZE; c++) { 1138 //** This could be optimized to remember what the last slot/item was and not count bits over and over. 1139 uint8_t mykey = c; 1140 uint32_t slot = mykey / 64; 1141 uint32_t bit = mykey % 64; 1142 uint32_t item = 0; 1143 uint64_t bword = root->bitmap[slot]; 1144 cursor->keylen = len; 1145 1146 if (bword & (1ull << bit)) { 1147 // ** Count all the set bits up to this bit 1148 for (int i=0; i < slot; i++) item += __builtin_popcountll(root->bitmap[i]); 1149 item += __builtin_popcountll(bword & ((1ull << bit)-1)); 1150 uint32_t offset = root->slots[item]; 1151 cursor->key[cursor->keylen++] = mykey; 1152 1153 if(DiskNextTrie_GetKind(offset) == CompactTrieKind) { 1154 CompactMapTrieLevelRef level = (CompactMapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, offset); 1155 if (level->payload && callback(ctx, cursor->key, level->payload, cursor->prefixlen==cursor->keylen)) return; 1156 if (cursor->keylen == cursor->prefixlen && exactmatch) return; 1157 traverseCFBurstTrieCompactMappedLevel(trie, level, cursor, exactmatch, ctx, callback); 1158 } else if(DiskNextTrie_GetKind(offset) == TrieKind) { 1159 traverseCFBurstTrieMappedLevel(trie, (MapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, offset), cursor, exactmatch, ctx, callback); 1160 } else if (DiskNextTrie_GetKind(offset) == ListKind) { 1161 cursor->next = offset; 1162 cursor->key[cursor->keylen] = 0; 1163 findCFBurstTrieMappedPage(trie, cursor, ctx, callback); 1164 } 1165 } 1166 } 1167 } 1168 1169 static void traverseCFBurstTrieWithCursor(CFBurstTrieRef trie, const uint8_t *prefix, uint32_t prefixLen, void **cursor, bool exactmatch, void *ctx, bool (*callback)(void *, const uint8_t *, uint32_t, bool)) { 1170 if (trie->mapBase) { 1171 if (trie->cflags & kCFBurstTriePrefixCompression) { 1172 fprintf(stderr, "Please use CFBurstTrieCursorRef API for file based trie.\n"); 1173 return; 1174 } else { 1175 TrieHeader *header = (TrieHeader *)trie->mapBase; 1176 MapCursor csr; 1177 csr.next = header->rootOffset; 1178 csr.prefix = prefix; 1179 csr.prefixlen = prefixLen; 1180 csr.key[0] = 0; 1181 csr.keylen = 0; 1182 findCFBurstTrieMappedLevel(trie, &csr, exactmatch, ctx, callback); 1183 } 1184 } else { 1185 TrieCursor csr; 1186 csr.next = ((uintptr_t)&trie->root)|TrieKind; 1187 csr.prefix = prefix; 1188 csr.prefixlen = prefixLen; 1189 csr.key[0] = 0; 1190 csr.keylen = 0; 1191 findCFBurstTrieLevel(trie, &csr, exactmatch, ctx, callback); 1192 } 1193 } 1194 1195 CF_INLINE uint32_t getPackedPageEntrySize(PageEntryPacked *entry) 1196 { 1197 return sizeof(PageEntryPacked) + entry->strlen; 1198 } 1199 1200 CF_INLINE uint32_t getPageEntrySize(PageEntry *entry) 1201 { 1202 return sizeof(PageEntry) + entry->strlen; 1203 } 1204 1205 /* 1206 static void _printPageEntry(PageEntryPacked *entry) { 1207 printf("entry 0x%p:\n", entry); 1208 printf("pfxLen = %d, strLen = %d\n", entry->pfxLen, entry->strlen); 1209 if (entry->strlen > 0) { 1210 printf("string = "); 1211 for (int i = 0; i < entry->strlen; ++i) 1212 printf("%d ", entry->string[i]); 1213 printf("\n"); 1214 } 1215 printf("\n"); 1216 } 1217 */ 1218 static Boolean advanceCursorOnMappedPageForByte(Page *page, CompactMapCursor *cursor, UInt8 byte) { 1219 PageEntryPacked *entry; 1220 Boolean found = FALSE; 1221 uint32_t minPrefixLength = 0; 1222 1223 if (cursor->isOnPage) { 1224 entry = (PageEntryPacked *)&page->data[cursor->entryOffsetInPage]; 1225 //_printPageEntry(entry); 1226 BOOL shouldContinue = TRUE; 1227 1228 if (!(cursor->entryOffsetInPage == 0 && entry->strlen == 0)) { 1229 if (cursor->entryOffsetInPage == entry->strlen - 1) { 1230 minPrefixLength = entry->pfxLen + entry->strlen; 1231 cursor->entryOffsetInPage += getPackedPageEntrySize(entry); 1232 } else { 1233 cursor->offsetInEntry++; 1234 if (entry->string[cursor->offsetInEntry] == byte) 1235 found = TRUE; 1236 else if (entry->string[cursor->offsetInEntry] > byte) 1237 shouldContinue = FALSE; 1238 else { 1239 minPrefixLength = entry->pfxLen + cursor->offsetInEntry; 1240 cursor->entryOffsetInPage += getPackedPageEntrySize(entry); 1241 } 1242 } 1243 } 1244 1245 if (found) { 1246 cursor->isOnPage = TRUE; 1247 return TRUE; 1248 } else if (!shouldContinue) 1249 return FALSE; 1250 } else { 1251 cursor->entryOffsetInPage = 0; 1252 } 1253 1254 uint32_t pageSize = page->length - sizeof(Page); 1255 while (cursor->entryOffsetInPage < pageSize) { 1256 entry = (PageEntryPacked *)&page->data[cursor->entryOffsetInPage]; 1257 //_printPageEntry(entry); 1258 if (minPrefixLength > entry->pfxLen) 1259 break; 1260 else if (minPrefixLength < entry->pfxLen) 1261 cursor->entryOffsetInPage += getPackedPageEntrySize(entry); 1262 else { 1263 if (entry->strlen == 0) 1264 cursor->entryOffsetInPage += getPackedPageEntrySize(entry); 1265 else { 1266 if (entry->string[0] > byte) 1267 // Entries are sorted alphabetically 1268 break; 1269 else if (entry->string[0] < byte) 1270 cursor->entryOffsetInPage += getPackedPageEntrySize(entry); 1271 else { 1272 cursor->offsetInEntry = 0; 1273 found = TRUE; 1274 break; 1275 } 1276 } 1277 } 1278 } 1279 1280 if (found) 1281 cursor->isOnPage = TRUE; 1282 1283 return found; 1284 } 1285 1286 static Boolean advanceCursorMappedPageWithPerfixCompression(Page *page, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length) 1287 { 1288 if (length == 0) { 1289 PageEntryPacked *entry = (PageEntryPacked*)&page->data[0]; 1290 if (!cursor->isOnPage) { 1291 cursor->entryOffsetInPage = 0; 1292 cursor->offsetInEntry = 0; 1293 cursor->isOnPage = entry->pfxLen == 0 && entry->strlen == 0; 1294 } 1295 getMapCursorPayloadFromPackedPageEntry(entry, cursor, &cursor->payload); 1296 return TRUE; 1297 } 1298 1299 for (CFIndex i = 0; i < length; ++i) { 1300 if (!advanceCursorOnMappedPageForByte(page, cursor, bytes[i])) 1301 return FALSE; 1302 } 1303 PageEntryPacked *entry = (PageEntryPacked*)&page->data[cursor->entryOffsetInPage]; 1304 getMapCursorPayloadFromPackedPageEntry(entry, cursor, &cursor->payload); 1305 return TRUE; 1306 } 1307 1308 static Boolean advanceCursorMappedPageSortedByKey(Page *page, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length) 1309 { 1310 if (length == 0) { 1311 PageEntry*entry = (PageEntry*)&page->data[0]; 1312 if (!cursor->isOnPage) { 1313 cursor->entryOffsetInPage = 0; 1314 cursor->offsetInEntry = 0; 1315 cursor->isOnPage = entry->strlen == 0; 1316 } 1317 getMapCursorPayloadFromPageEntry(entry, cursor, &cursor->payload); 1318 return TRUE; 1319 } 1320 1321 PageEntry *entry; 1322 uint32_t pageSize = page->length - sizeof(Page); 1323 const UInt8 * prefix = NULL; 1324 uint32_t prefixLength = 0; 1325 1326 if (cursor->isOnPage) { 1327 entry = (PageEntry*)&page->data[cursor->entryOffsetInPage]; 1328 prefix = entry->string; 1329 prefixLength = cursor->offsetInEntry + 1; 1330 } 1331 1332 while (cursor->entryOffsetInPage < pageSize) { 1333 PageEntry *entry = (PageEntry*)&page->data[cursor->entryOffsetInPage]; 1334 if (entry->strlen == 0) { 1335 cursor->entryOffsetInPage += getPageEntrySize(entry); 1336 continue; 1337 } 1338 1339 if (entry->strlen <= prefixLength) 1340 return FALSE; 1341 else { 1342 int cmpResult = 0; 1343 if (prefixLength > 0) 1344 cmpResult = __builtin_memcmp(entry->string, prefix, prefixLength); 1345 if (cmpResult > 0) 1346 return FALSE; 1347 else if (cmpResult == 0) { 1348 if (entry->strlen < prefixLength + length) { 1349 int cmpResult2 = __builtin_memcmp(entry->string + prefixLength, bytes, entry->strlen - prefixLength); 1350 if (cmpResult2 > 0) 1351 return FALSE; 1352 else 1353 cursor->entryOffsetInPage += getPageEntrySize(entry); 1354 } else { 1355 int cmpResult2 = __builtin_memcmp(entry->string + prefixLength, bytes, length); 1356 if (cmpResult2 > 0) 1357 return FALSE; 1358 else if (cmpResult2 == 0) { 1359 cursor->isOnPage = TRUE; 1360 cursor->offsetInEntry = prefixLength + length - 1; 1361 getMapCursorPayloadFromPageEntry(entry, cursor, &cursor->payload); 1362 return TRUE; 1363 } else 1364 cursor->entryOffsetInPage += getPageEntrySize(entry); 1365 } 1366 } else 1367 cursor->entryOffsetInPage += getPageEntrySize(entry); 1368 } 1369 } 1370 return FALSE; 1371 } 1372 1373 static Boolean advanceCursorMappedPage(CFBurstTrieRef trie, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length) 1374 { 1375 if (!bytes || length < 0) 1376 return FALSE; 1377 1378 Page *page = (Page *)DiskNextTrie_GetPtr(trie->mapBase, cursor->next); 1379 uint32_t pageSize = page->length - sizeof(Page); 1380 if (pageSize == 0) 1381 return FALSE; 1382 1383 if (trie->cflags & kCFBurstTrieSortByKey) 1384 return advanceCursorMappedPageSortedByKey(page, cursor, bytes, length); 1385 else if (trie->cflags & kCFBurstTriePrefixCompression) 1386 return advanceCursorMappedPageWithPerfixCompression(page, cursor, bytes, length); 1387 else 1388 return FALSE; 1389 } 1390 1391 static Boolean advanceCursorCompactMappedLevel(CFBurstTrieRef trie, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length) 1392 { 1393 if (!bytes || length < 0) 1394 return FALSE; 1395 1396 CompactMapTrieLevelRef root = (CompactMapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next); 1397 if (length == 0) { 1398 cursor->payload = root->payload; 1399 return TRUE; 1400 } 1401 1402 uint8_t slot = bytes[0] / 64; 1403 uint8_t bit = bytes[0] % 64; 1404 uint32_t item = 0; 1405 uint64_t bword = root->bitmap[slot]; 1406 if (bword & (1ull << bit)) { 1407 // Count all the set bits up to this bit 1408 for (int i = 0; i < slot; ++i) 1409 item += __builtin_popcountll(root->bitmap[i]); 1410 item += __builtin_popcountll(bword & ((1ull << bit)-1)); 1411 cursor->next = root->slots[item]; 1412 return advanceMapCursor(trie, cursor, bytes + 1, length - 1); 1413 } else { 1414 return FALSE; 1415 } 1416 } 1417 1418 static Boolean advanceCursorMappedLevel(CFBurstTrieRef trie, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length) 1419 { 1420 if (!bytes || length < 0) 1421 return FALSE; 1422 1423 MapTrieLevelRef root = (MapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next); 1424 if (length == 0) { 1425 cursor->payload = root->payload; 1426 return TRUE; 1427 } 1428 1429 cursor->next = root->slots[bytes[0]]; 1430 return advanceMapCursor(trie, cursor, bytes + 1, length - 1); 1431 } 1432 1433 static Boolean advanceMapCursor(CFBurstTrieRef trie, CompactMapCursor *cursor, const UInt8* bytes, CFIndex length) 1434 { 1435 bool result = FALSE; 1436 switch (DiskNextTrie_GetKind(cursor->next)) { 1437 case TrieKind: 1438 result = advanceCursorMappedLevel(trie, cursor, bytes, length); 1439 break; 1440 case CompactTrieKind: 1441 result = advanceCursorCompactMappedLevel(trie, cursor, bytes, length); 1442 break; 1443 case ListKind: 1444 result = advanceCursorMappedPage(trie, cursor, bytes, length); 1445 break; 1446 case Nothing: { 1447 TrieHeader *header = (TrieHeader*)trie->mapBase; 1448 if (cursor->next == header->rootOffset) 1449 result = advanceCursorMappedLevel(trie, cursor, bytes, length); 1450 } 1451 } 1452 1453 return result; 1454 } 1455 1456 static void traverseFromMapCursorMappedLevel(CFBurstTrieRef trie, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback) 1457 { 1458 MapTrieLevelRef root = (MapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next); 1459 if (root->payload) { 1460 callback(ctx, bytes, length, root->payload, stop); 1461 if (*stop) 1462 return; 1463 } 1464 1465 if (length >= capacity) 1466 return; 1467 1468 for (int i = 0; i < CHARACTER_SET_SIZE; ++i) {i = 1469 bytes[length] = i; 1470 cursor->next = (uint32_t)root->slots[i];; 1471 cursor->isOnPage = FALSE; 1472 cursor->entryOffsetInPage = 0; 1473 cursor->offsetInEntry = 0; 1474 cursor->payload = 0; 1475 traverseFromMapCursor(trie, cursor, bytes, capacity - 1, length + 1, stop, ctx, callback); 1476 if (*stop) 1477 return; 1478 } 1479 } 1480 1481 static void traverseFromMapCursorCompactMappedLevel(CFBurstTrieRef trie, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback) 1482 { 1483 CompactMapTrieLevelRef root = (CompactMapTrieLevelRef)DiskNextTrie_GetPtr(trie->mapBase, cursor->next); 1484 if (root->payload) { 1485 callback(ctx, bytes, length, root->payload, stop); 1486 if (*stop) 1487 return; 1488 } 1489 1490 if (length >= capacity) 1491 return; 1492 for (int c = 0; c < CHARACTER_SET_SIZE; ++c) { 1493 bytes[length] = c; 1494 //This could be optimized to remember what the last slot/item was and not count bits over and over. 1495 uint8_t slot = c / 64; 1496 uint8_t bit = c % 64; 1497 uint32_t item = 0; 1498 uint64_t bword = root->bitmap[slot]; 1499 if (bword & (1ull << bit)) { 1500 // Count all the set bits up to this bit 1501 for (int i = 0; i < slot; ++i) 1502 item += __builtin_popcountll(root->bitmap[i]); 1503 item += __builtin_popcountll(bword & ((1ull << bit)-1)); 1504 cursor->next = root->slots[item]; 1505 cursor->isOnPage = FALSE; 1506 cursor->entryOffsetInPage = 0; 1507 cursor->offsetInEntry = 0; 1508 cursor->payload = 0; 1509 traverseFromMapCursor(trie, cursor, bytes, capacity - 1, length + 1, stop, ctx, callback); 1510 if (*stop) 1511 return; 1512 } 1513 } 1514 } 1515 1516 static void traverseFromMapCursorMappedPageWithPrefixCompression(Page *page, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback) 1517 { 1518 uint32_t pageSize = page->length - sizeof(Page); 1519 uint32_t offset = cursor->entryOffsetInPage; 1520 uint32_t minPrefixLength = 0; 1521 if (cursor->isOnPage) { 1522 PageEntryPacked *entry = (PageEntryPacked*)&page->data[offset]; 1523 int32_t remainingLength = entry->strlen - cursor->offsetInEntry - 1; 1524 if (remainingLength >= 0 && remainingLength <= capacity) { 1525 memcpy(bytes + length, entry->string + cursor->offsetInEntry + 1, remainingLength); 1526 callback(ctx, bytes, length + remainingLength, entry->payload, stop); 1527 if (*stop) 1528 return; 1529 } 1530 minPrefixLength = entry->pfxLen + cursor->offsetInEntry; 1531 offset += getPackedPageEntrySize(entry); 1532 } 1533 PageEntryPacked *previousEntry = NULL; 1534 while (offset < pageSize) { 1535 PageEntryPacked *entry = (PageEntryPacked*)&page->data[offset]; 1536 if (minPrefixLength > entry->pfxLen) 1537 break; 1538 else if (entry->payload && entry->strlen <= capacity) { 1539 if (previousEntry) 1540 length -= previousEntry->strlen + previousEntry->pfxLen - entry->pfxLen; 1541 memcpy(bytes + length, entry->string, entry->strlen); 1542 callback(ctx, bytes, length + entry->strlen, entry->payload, stop); 1543 length += entry->strlen; 1544 if (*stop) 1545 return; 1546 } 1547 previousEntry = entry; 1548 offset += getPackedPageEntrySize(entry); 1549 } 1550 } 1551 1552 static void traverseFromMapCursorMappedPageSortedByKey(Page *page, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback) 1553 { 1554 uint32_t pageSize = page->length - sizeof(Page); 1555 uint32_t offset = cursor->entryOffsetInPage; 1556 uint32_t prefixLength = 0; 1557 const UInt8 *prefix = NULL; 1558 if (cursor->isOnPage) { 1559 PageEntry *entry = (PageEntry*)&page->data[offset]; 1560 int32_t remainingLength = entry->strlen - cursor->offsetInEntry - 1; 1561 if (remainingLength >= 0 && remainingLength <= capacity) { 1562 memcpy(bytes + length, entry->string + cursor->offsetInEntry, remainingLength); 1563 callback(ctx, bytes, length + remainingLength, entry->payload, stop); 1564 if (*stop) 1565 return; 1566 } 1567 prefixLength = cursor->offsetInEntry + 1; 1568 prefix = entry->string; 1569 offset += getPageEntrySize(entry); 1570 } 1571 1572 while (offset < pageSize) { 1573 PageEntry *entry = (PageEntry*)&page->data[offset]; 1574 if (entry->strlen < prefixLength) 1575 break; 1576 else { 1577 int cmpResult = __builtin_memcmp(entry->string, prefix, prefixLength); 1578 if (cmpResult > 0) 1579 break; 1580 else if (entry->payload && entry->strlen <= capacity) { 1581 if (entry->strlen > 0) 1582 memcpy(bytes + length, entry->string + prefixLength, entry->strlen - prefixLength); 1583 callback(ctx, bytes, length + entry->strlen - prefixLength, entry->payload, stop); 1584 if (*stop) 1585 return; 1586 } 1587 offset += getPageEntrySize(entry); 1588 } 1589 } 1590 } 1591 1592 static void traverseFromMapCursorMappedPage(CFBurstTrieRef trie, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback) 1593 { 1594 Page *page = (Page *)DiskNextTrie_GetPtr(trie->mapBase, cursor->next); 1595 if (trie->cflags & kCFBurstTrieSortByKey) 1596 traverseFromMapCursorMappedPageSortedByKey(page, cursor, bytes, capacity, length, stop, ctx, callback); 1597 else if (trie->cflags & kCFBurstTriePrefixCompression) 1598 traverseFromMapCursorMappedPageWithPrefixCompression(page, cursor, bytes, capacity, length, stop, ctx, callback); 1599 } 1600 1601 void traverseFromMapCursor(CFBurstTrieRef trie, CompactMapCursor *cursor, UInt8* bytes, uint32_t capacity, uint32_t length, Boolean *stop, void *ctx, CFBurstTrieTraversalCallback callback) 1602 { 1603 switch (DiskNextTrie_GetKind(cursor->next)) { 1604 case TrieKind: 1605 traverseFromMapCursorMappedLevel(trie, cursor, bytes, capacity, length, stop, ctx, callback); 1606 break; 1607 case CompactTrieKind: 1608 traverseFromMapCursorCompactMappedLevel(trie, cursor, bytes, capacity, length, stop, ctx, callback); 1609 break; 1610 case ListKind: 1611 traverseFromMapCursorMappedPage(trie, cursor, bytes, capacity, length, stop, ctx, callback); 1612 break; 1613 case Nothing: { 1614 TrieHeader *header = (TrieHeader*)trie->mapBase; 1615 if (cursor->next == header->rootOffset) { 1616 traverseFromMapCursorMappedLevel(trie, cursor, bytes, capacity, length, stop, ctx, callback); 1617 } 1618 break; 1619 } 1620 } 1621 } 1622 1623 void copyMapCursor(const CompactMapCursor *source, CompactMapCursor* destination) 1624 { 1625 destination->next = source->next; 1626 destination->entryOffsetInPage = source->entryOffsetInPage; 1627 destination->offsetInEntry = source->offsetInEntry; 1628 destination->isOnPage = source->isOnPage; 1629 destination->payload = source->payload; 1630 } 1631 1632 Boolean areMapCursorsEqual(const CompactMapCursor *lhs, const CompactMapCursor *rhs) 1633 { 1634 return lhs->entryOffsetInPage == rhs->entryOffsetInPage && lhs->isOnPage == rhs->isOnPage && lhs->next == rhs->next && lhs->offsetInEntry == rhs->offsetInEntry; 1635 } 1636 1637 static Boolean getMapCursorPayloadFromPackedPageEntry(PageEntryPacked *entry, const CompactMapCursor *cursor, uint32_t *payload) 1638 { 1639 if (payload) 1640 *payload = 0; 1641 if (cursor->entryOffsetInPage == 0 && cursor->offsetInEntry == 0 && entry->strlen == 0) { 1642 if (payload) 1643 *payload = entry->payload; 1644 return TRUE; 1645 } else if (cursor->offsetInEntry != entry->strlen - 1) 1646 return FALSE; 1647 else { 1648 if (payload) 1649 *payload = entry->payload; 1650 return TRUE; 1651 } 1652 } 1653 1654 static Boolean getMapCursorPayloadFromPageEntry(PageEntry *entry, const CompactMapCursor *cursor, uint32_t *payload) 1655 { 1656 if (payload) 1657 *payload = 0; 1658 if (cursor->entryOffsetInPage == 0 && cursor->offsetInEntry == 0 && entry->strlen == 0) { 1659 if (payload) 1660 *payload = entry->payload; 1661 return TRUE; 1662 } else if (cursor->offsetInEntry != entry->strlen - 1) 1663 return FALSE; 1664 else { 1665 if (payload) 1666 *payload = entry->payload; 1667 return TRUE; 1668 } 1669 } 1670 1671 Boolean getMapCursorPayload(CFBurstTrieRef trie, const CompactMapCursor *cursor, uint32_t *payload) 1672 { 1673 if (!cursor) 1674 return FALSE; 1675 if (cursor->payload) { 1676 if (payload) 1677 *payload = cursor->payload; 1678 return TRUE; 1679 } 1680 return FALSE; 1681 } 1682 1683 // Legacy 1684 1685 static Boolean burstTrieMappedFind(DiskTrieLevelRef trie, char *map, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix) { 1686 Boolean success = false; 1687 if (length) { 1688 uint32_t offset = CFSwapInt32LittleToHost((uint32_t)trie->slots[*key]); 1689 if(DiskNextTrie_GetKind(offset) == TrieKind) { 1690 return burstTrieMappedFind((DiskTrieLevelRef)DiskNextTrie_GetPtr(map,offset), map, key+1, length-1, payload, prefix); 1691 } else if(DiskNextTrie_GetKind(offset) == CompactTrieKind) { 1692 return burstTrieCompactTrieMappedFind((CompactDiskTrieLevelRef)DiskNextTrie_GetPtr(map, offset), map, key+1, length-1, payload, prefix); 1693 } else { 1694 if(DiskNextTrie_GetKind(offset) == ListKind) { 1695 return burstTrieMappedPageFind((StringPage *)DiskNextTrie_GetPtr(map, offset), key+1, length-1, payload, prefix); 1696 } else { 1697 return success; 1698 } 1699 } 1700 } else { 1701 if (trie->weight) { 1702 SetPayload(payload, CFSwapInt32LittleToHost(trie->payload)); 1703 success = true; 1704 } 1705 } 1706 return success; 1707 } 1708 1709 static Boolean burstTrieCompactTrieMappedFind(CompactDiskTrieLevelRef trie, char *map, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix) { 1710 Boolean success = false; 1711 if (length) { 1712 uint32_t mykey = *key; 1713 uint32_t slot = mykey / 64; 1714 uint32_t bit = mykey % 64; 1715 uint32_t item = 0; 1716 uint64_t bword = CFSwapInt64LittleToHost(trie->bitmap[slot]); 1717 if (bword & (1ull << bit)) { 1718 /* Count all the set bits up to this bit */ 1719 for (int i=0; i < slot; i++) { 1720 item += __builtin_popcountll(trie->bitmap[i]); 1721 } 1722 item += __builtin_popcountll(bword & ((1ull << bit)-1)); 1723 uint32_t offset = CFSwapInt32LittleToHost((uint32_t)trie->slots[item]); 1724 if(DiskNextTrie_GetKind(offset) == TrieKind) { 1725 return burstTrieMappedFind((DiskTrieLevelRef)DiskNextTrie_GetPtr(map, offset), map, key+1, length-1, payload, prefix); 1726 } else if(DiskNextTrie_GetKind(offset) == CompactTrieKind) { 1727 return burstTrieCompactTrieMappedFind((CompactDiskTrieLevelRef)DiskNextTrie_GetPtr(map, offset), map, key+1, length-1, payload, prefix); 1728 } 1729 else { 1730 if(DiskNextTrie_GetKind(offset) == ListKind) { 1731 return burstTrieMappedPageFind((StringPage *)DiskNextTrie_GetPtr(map, offset), key+1, length-1, payload, prefix); 1732 } else { 1733 return success; 1734 } 1735 } 1736 } 1737 } else { 1738 if (trie->weight) { 1739 SetPayload(payload, CFSwapInt32LittleToHost(trie->payload)); 1740 success = true; 1741 } 1742 } 1743 return success; 1744 } 1745 1746 static Boolean burstTrieMappedPageFind(StringPage *page, const UInt8 *key, uint32_t length, uint32_t *payload, bool prefix) { 1747 Boolean success = false; 1748 uint32_t end = CFSwapInt32LittleToHost(page->length); 1749 uint32_t cur = 0; 1750 if (prefix) { 1751 uint8_t pfx[256]; 1752 while (cur < end) { 1753 StringPageEntryPacked *entry = (StringPageEntryPacked *)&page->data[cur]; 1754 uint16_t strlen = entry->pfxLen+CFSwapInt16LittleToHost(entry->strlen); 1755 if (strlen == length && __builtin_memcmp(pfx, key, entry->pfxLen) == 0 && __builtin_memcmp(entry->string, key+entry->pfxLen, length-entry->pfxLen) == 0) { 1756 SetPayload(payload, CFSwapInt32LittleToHost(entry->payload)); 1757 success = true; 1758 return success; 1759 } else { 1760 memcpy(pfx+entry->pfxLen, entry->string, MIN(255-entry->pfxLen, length-entry->pfxLen)); 1761 cur += sizeof(*entry) + strlen - entry->pfxLen; 1762 } 1763 } 1764 } else { 1765 while (cur < end) { 1766 StringPageEntry *entry = (StringPageEntry *)&page->data[cur]; 1767 uint16_t strlen = CFSwapInt16LittleToHost(entry->strlen); 1768 if (strlen == length && __builtin_memcmp(entry->string, key, length) == 0) { 1769 SetPayload(payload, CFSwapInt32LittleToHost(entry->payload)); 1770 success = true; 1771 return success; 1772 } else { 1773 cur += sizeof(*entry) + strlen; 1774 } 1775 } 1776 1777 } 1778 return success; 1779 } 1780 1781 1782 #if 0 1783 #pragma mark - 1784 #pragma mark Serialization 1785 #endif 1786 1787 static bool serializeCFBurstTrieLevels(CFBurstTrieRef trie, TrieLevelRef root, uint32_t *offset, off_t start_offset, bool dispose, bool isroot, int fd) 1788 { 1789 bool dense = true; 1790 int count = 0; 1791 1792 for (int i=0; i < CHARACTER_SET_SIZE; i++) if (root->slots[i]) count++; 1793 1794 uint32_t this_offset = *offset; 1795 1796 if ((trie->cflags & kCFBurstTrieBitmapCompression) && count < MAX_BITMAP_SIZE && !isroot) { 1797 size_t size = sizeof(CompactMapTrieLevel) + sizeof(uint32_t) * count; 1798 int offsetSlot = 0; 1799 1800 CompactMapTrieLevel *maptrie = (CompactMapTrieLevel *)alloca(size); 1801 bzero(maptrie, size); 1802 *offset += size; 1803 1804 for (int i=0; i < CHARACTER_SET_SIZE; i++) { 1805 NextTrie next = root->slots[i]; 1806 if (next) { 1807 uint32_t slot = i / 64; 1808 uint32_t bit = i % 64; 1809 maptrie->bitmap[slot] |= 1ull<<bit; 1810 if (NextTrie_GetKind(next) == TrieKind) { 1811 TrieLevelRef nextLevel = (TrieLevelRef)NextTrie_GetPtr(next); 1812 uint32_t childOffset = *offset; 1813 if (serializeCFBurstTrieLevels(trie, nextLevel, offset, start_offset, true, false, fd)) { 1814 maptrie->slots[offsetSlot] = (TrieKind|childOffset); 1815 } else { 1816 maptrie->slots[offsetSlot] = (CompactTrieKind|childOffset); 1817 } 1818 } else { 1819 maptrie->slots[offsetSlot] = next; 1820 } 1821 offsetSlot++; 1822 } 1823 } 1824 maptrie->payload = root->payload; 1825 1826 int bitcount = 0; 1827 for (int i=0; i < 4; i++) bitcount += __builtin_popcountll(maptrie->bitmap[i]); 1828 assert(bitcount == count); 1829 1830 pwrite(fd, maptrie, size, this_offset+start_offset); 1831 dense = false; 1832 } else { 1833 MapTrieLevel maptrie; 1834 *offset += sizeof(maptrie); 1835 1836 for (int i=0; i < CHARACTER_SET_SIZE; i++) { 1837 NextTrie next = root->slots[i]; 1838 if (NextTrie_GetKind(next) == TrieKind) { 1839 TrieLevelRef nextLevel = (TrieLevelRef)NextTrie_GetPtr(next); 1840 uint32_t childOffset = *offset; 1841 if (serializeCFBurstTrieLevels(trie, nextLevel, offset, start_offset, true, false, fd)) { 1842 maptrie.slots[i] = (TrieKind|childOffset); 1843 } else { 1844 maptrie.slots[i] = (CompactTrieKind|childOffset); 1845 } 1846 } else { 1847 maptrie.slots[i] = next; 1848 } 1849 } 1850 maptrie.payload = root->payload; 1851 pwrite(fd, &maptrie, sizeof(maptrie), this_offset+start_offset); 1852 } 1853 1854 if (dispose) free(root); 1855 return dense; 1856 } 1857 1858 static void serializeCFBurstTrieList(CFBurstTrieRef trie, ListNodeRef listNode, int fd) 1859 { 1860 uint32_t listCount; 1861 size_t size = trie->containerSize; 1862 1863 // ** Temp list of nodes to sort 1864 ListNodeRef *nodes = (ListNodeRef *)malloc(sizeof(ListNodeRef) * size); 1865 for (listCount = 0; listNode; listCount++) { 1866 if (listCount >= size) { 1867 size *= 2; 1868 nodes = (ListNodeRef *)realloc(nodes, sizeof(ListNodeRef) * size); 1869 } 1870 nodes[listCount] = listNode; 1871 listNode = listNode->next; 1872 } 1873 1874 char _buffer[MAX_BUFFER_SIZE]; 1875 size_t bufferSize = (sizeof(Page) + size * (sizeof(PageEntryPacked) + MAX_STRING_SIZE)); 1876 char *buffer = bufferSize < MAX_BUFFER_SIZE ? _buffer : (char *) malloc(bufferSize); 1877 1878 Page *page = (Page *)buffer; 1879 uint32_t current = 0; 1880 1881 if (trie->cflags & kCFBurstTriePrefixCompression) { 1882 qsort(nodes, listCount, sizeof(ListNodeRef), nodeStringCompare); 1883 1884 ListNodeRef last = 0; 1885 for (int i=0; i < listCount; i++) { 1886 listNode = nodes[i]; 1887 uint8_t pfxLen = 0; 1888 if (last) { 1889 for ( ; 1890 pfxLen < CHARACTER_SET_SIZE-1 && 1891 pfxLen < listNode->length && 1892 pfxLen < last->length && 1893 listNode->string[pfxLen] == last->string[pfxLen]; 1894 pfxLen++); 1895 } 1896 1897 PageEntryPacked *entry = (PageEntryPacked *)(&page->data[current]); 1898 entry->strlen = listNode->length - pfxLen; 1899 entry->payload = listNode->payload; 1900 entry->pfxLen = pfxLen; 1901 memcpy(entry->string, listNode->string+pfxLen, listNode->length-pfxLen); 1902 current += listNode->length - pfxLen + sizeof(PageEntryPacked); 1903 last = listNode; 1904 } 1905 } else { 1906 if (trie->cflags & kCFBurstTrieSortByKey) 1907 qsort(nodes, listCount, sizeof(ListNodeRef), nodeStringCompare); 1908 else 1909 qsort(nodes, listCount, sizeof(ListNodeRef), nodeWeightCompare); 1910 1911 for (int i=0; i < listCount; i++) { 1912 listNode = nodes[i]; 1913 PageEntry *entry = (PageEntry *)(&page->data[current]); 1914 entry->strlen = listNode->length; 1915 entry->payload = listNode->payload; 1916 memcpy(entry->string, listNode->string, listNode->length); 1917 current += listNode->length + sizeof(PageEntry); 1918 } 1919 } 1920 1921 size_t len = (sizeof(Page) + current + 3) & ~3; 1922 page->length = current; 1923 write(fd, page, len); 1924 1925 free(nodes); 1926 if (buffer != _buffer) free(buffer); 1927 } 1928 1929 static void serializeCFBurstTrieLists(CFBurstTrieRef trie, TrieLevelRef root, off_t start_offset, int fd) 1930 { 1931 for (int i=0; i < CHARACTER_SET_SIZE; i++) { 1932 NextTrie next = root->slots[i]; 1933 uint32_t offset; 1934 if (NextTrie_GetKind(next) == TrieKind) { 1935 TrieLevelRef nextLevel = (TrieLevelRef)NextTrie_GetPtr(next); 1936 serializeCFBurstTrieLists(trie, nextLevel, start_offset, fd); 1937 } else { 1938 if (NextTrie_GetKind(next) == ListKind) { 1939 ListNodeRef listNode = (ListNodeRef)NextTrie_GetPtr(next); 1940 offset = lseek(fd, 0, SEEK_CUR) - start_offset; 1941 serializeCFBurstTrieList(trie, listNode, fd); 1942 finalizeCFBurstTrieList(listNode); 1943 //assert((offset & 3)==0); 1944 root->slots[i] = (offset|ListKind); 1945 } 1946 } 1947 } 1948 } 1949 1950 static size_t serializeCFBurstTrie(CFBurstTrieRef trie, size_t start_offset, int fd) 1951 { 1952 TrieHeader header; 1953 header.signature = 0x0ddba11; 1954 header.rootOffset = 0; 1955 header.count = trie->count; 1956 header.size = 0; 1957 header.flags = trie->cflags; 1958 header.reserved[0] = 0; 1959 1960 uint32_t offset; 1961 lseek(fd, start_offset, SEEK_SET); 1962 1963 size_t header_size = sizeof(header); 1964 write(fd, &header, header_size); 1965 1966 serializeCFBurstTrieLists(trie, &trie->root, start_offset, fd); 1967 1968 offset = lseek(fd, 0, SEEK_CUR) - start_offset; 1969 size_t off = offsetof(TrieHeader, rootOffset); 1970 size_t offsize = sizeof(offset); 1971 pwrite(fd, &offset, offsize, off+start_offset); 1972 1973 serializeCFBurstTrieLevels(trie, &trie->root, &offset, start_offset, false, true, fd); 1974 1975 size_t off2 = offsetof(TrieHeader, size); 1976 offsize = sizeof(offset); 1977 pwrite(fd, &offset, offsize, off2+start_offset); 1978 1979 offset = lseek(fd, 0, SEEK_END); 1980 return (size_t)(offset-start_offset); 1981 } 1982 1983 #if 0 1984 #pragma mark - 1985 #pragma mark Release 1986 #endif 1987 1988 static void destroyCFBurstTrie(CFBurstTrieRef trie) { 1989 if (trie->mapBase) { 1990 #if DEPLOYMENT_TARGET_WINDOWS 1991 UnmapViewOfFile(trie->mapBase); 1992 CloseHandle(trie->mapHandle); 1993 CloseHandle(trie->mappedFileHandle); 1994 #else 1995 munmap(trie->mapBase, trie->mapSize); 1996 #endif 1997 } else { 1998 finalizeCFBurstTrie(&trie->root); 1999 } 2000 free(trie); 2001 return; 2002 } 2003 2004 static void finalizeCFBurstTrie(TrieLevelRef trie) { 2005 for (int i=0; i < CHARACTER_SET_SIZE; i++) { 2006 if (NextTrie_GetKind(trie->slots[i]) == TrieKind) { 2007 finalizeCFBurstTrie((TrieLevelRef)NextTrie_GetPtr(trie->slots[i])); 2008 free((void *)NextTrie_GetPtr(trie->slots[i])); 2009 } else if (NextTrie_GetKind(trie->slots[i]) == ListKind) { 2010 finalizeCFBurstTrieList((ListNodeRef)NextTrie_GetPtr(trie->slots[i])); 2011 } 2012 } 2013 } 2014 2015 static void finalizeCFBurstTrieList(ListNodeRef node) { 2016 do { 2017 ListNodeRef next = node->next; 2018 free(node); 2019 node = next; 2020 } while(node); 2021 } 2022 2023 #if 0 2024 #pragma mark - 2025 #pragma mark Helpers 2026 #endif 2027 2028 static int nodeWeightCompare(const void *a, const void *b) { 2029 ListNodeRef nodeA = *(ListNodeRef *)a; 2030 ListNodeRef nodeB = *(ListNodeRef *)b; 2031 return (nodeB->weight - nodeA->weight); 2032 } 2033 2034 static int nodeStringCompare(const void *a, const void *b) { 2035 ListNodeRef nodeA = *(ListNodeRef *)a; 2036 ListNodeRef nodeB = *(ListNodeRef *)b; 2037 int result = memcmp((char *)nodeA->string, (char *)nodeB->string, MIN(nodeA->length, nodeB->length)); 2038 if (result == 0) result = nodeA->length-nodeB->length; 2039 return result; 2040 } 2041 2042 static bool containsKey(void *context, const uint8_t *key, uint32_t payload, bool exact) 2043 { 2044 uint32_t *ctx = (uint32_t *)context; 2045 if (exact) *ctx = payload; 2046 return exact; 2047 } 2048 2049 static CFIndex burstTrieConvertCharactersToUTF8(UniChar *chars, CFIndex numChars, UInt8 *buffer) { 2050 uint32_t i, j; 2051 for (i = j = 0; i < numChars; i++) { 2052 UniChar c = chars[i]; 2053 if (CFStringIsSurrogateHighCharacter(c) && i + 1 < numChars && CFStringIsSurrogateLowCharacter(chars[i + 1])) { 2054 UTF32Char lc = CFStringGetLongCharacterForSurrogatePair(c, chars[i + 1]); 2055 buffer[j++] = 0xf0 + (lc >> 18); 2056 buffer[j++] = 0x80 + ((lc & 0x3ffff) >> 12); 2057 buffer[j++] = 0x80 + ((lc & 0xfff) >> 6); 2058 buffer[j++] = 0x80 + (lc & 0x3f); 2059 i++; 2060 } else { 2061 if (c < 0x80) { 2062 buffer[j++] = c; 2063 } else if (c < 0x800) { 2064 buffer[j++] = 0xc0 + (c >> 6); 2065 buffer[j++] = 0x80 + (c & 0x3f); 2066 } else { 2067 buffer[j++] = 0xe0 + (c >> 12); 2068 buffer[j++] = 0x80 + ((c & 0xfff) >> 6); 2069 buffer[j++] = 0x80 + (c & 0x3f); 2070 } 2071 } 2072 } 2073 buffer[j] = 0; 2074 return j; 2075 } 2076