umsgpack.py
1 # u-msgpack-python v2.7.1 - v at sergeev.io 2 # https://github.com/vsergeev/u-msgpack-python 3 # 4 # u-msgpack-python is a lightweight MessagePack serializer and deserializer 5 # module, compatible with both Python 2 and 3, as well CPython and PyPy 6 # implementations of Python. u-msgpack-python is fully compliant with the 7 # latest MessagePack specification.com/msgpack/msgpack/blob/master/spec.md). In 8 # particular, it supports the new binary, UTF-8 string, and application ext 9 # types. 10 # 11 # MIT License 12 # 13 # Copyright (c) 2013-2020 vsergeev / Ivan (Vanya) A. Sergeev 14 # 15 # Permission is hereby granted, free of charge, to any person obtaining a copy 16 # of this software and associated documentation files (the "Software"), to deal 17 # in the Software without restriction, including without limitation the rights 18 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 19 # copies of the Software, and to permit persons to whom the Software is 20 # furnished to do so, subject to the following conditions: 21 # 22 # The above copyright notice and this permission notice shall be included in 23 # all copies or substantial portions of the Software. 24 # 25 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 26 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 27 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 28 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 29 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 30 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 31 # THE SOFTWARE. 32 # 33 """ 34 u-msgpack-python v2.7.1 - v at sergeev.io 35 https://github.com/vsergeev/u-msgpack-python 36 37 u-msgpack-python is a lightweight MessagePack serializer and deserializer 38 module, compatible with both Python 2 and 3, as well CPython and PyPy 39 implementations of Python. u-msgpack-python is fully compliant with the 40 latest MessagePack specification.com/msgpack/msgpack/blob/master/spec.md). In 41 particular, it supports the new binary, UTF-8 string, and application ext 42 types. 43 44 License: MIT 45 """ 46 import struct 47 import collections 48 import datetime 49 import sys 50 import io 51 52 if sys.version_info[0:2] >= (3, 3): 53 from collections.abc import Hashable 54 else: 55 from collections import Hashable 56 57 __version__ = "2.7.1" 58 "Module version string" 59 60 version = (2, 7, 1) 61 "Module version tuple" 62 63 64 ############################################################################## 65 # Ext Class 66 ############################################################################## 67 68 # Extension type for application-defined types and data 69 class Ext(object): 70 """ 71 The Ext class facilitates creating a serializable extension object to store 72 an application-defined type and data byte array. 73 """ 74 75 def __init__(self, type, data): 76 """ 77 Construct a new Ext object. 78 79 Args: 80 type: application-defined type integer 81 data: application-defined data byte array 82 83 TypeError: 84 Type is not an integer. 85 ValueError: 86 Type is out of range of -128 to 127. 87 TypeError:: 88 Data is not type 'bytes' (Python 3) or not type 'str' (Python 2). 89 90 Example: 91 >>> foo = umsgpack.Ext(5, b"\x01\x02\x03") 92 >>> umsgpack.packb({u"special stuff": foo, u"awesome": True}) 93 '\x82\xa7awesome\xc3\xadspecial stuff\xc7\x03\x05\x01\x02\x03' 94 >>> bar = umsgpack.unpackb(_) 95 >>> print(bar["special stuff"]) 96 Ext Object (Type: 5, Data: 01 02 03) 97 >>> 98 """ 99 # Check type is type int and in range 100 if not isinstance(type, int): 101 raise TypeError("ext type is not type integer") 102 elif not (-2**7 <= type <= 2**7 - 1): 103 raise ValueError("ext type value {:d} is out of range (-128 to 127)".format(type)) 104 # Check data is type bytes or str 105 elif sys.version_info[0] == 3 and not isinstance(data, bytes): 106 raise TypeError("ext data is not type \'bytes\'") 107 elif sys.version_info[0] == 2 and not isinstance(data, str): 108 raise TypeError("ext data is not type \'str\'") 109 110 self.type = type 111 self.data = data 112 113 def __eq__(self, other): 114 """ 115 Compare this Ext object with another for equality. 116 """ 117 return isinstance(other, self.__class__) \ 118 and self.type == other.type and self.data == other.data 119 120 def __ne__(self, other): 121 """ 122 Compare this Ext object with another for inequality. 123 """ 124 return not self.__eq__(other) 125 126 def __str__(self): 127 """ 128 String representation of this Ext object. 129 """ 130 s = "Ext Object (Type: {:d}, Data: ".format(self.type) 131 s += " ".join(["0x{:02}".format(ord(self.data[i:i + 1])) 132 for i in xrange(min(len(self.data), 8))]) 133 if len(self.data) > 8: 134 s += " ..." 135 s += ")" 136 return s 137 138 def __hash__(self): 139 """ 140 Provide a hash of this Ext object. 141 """ 142 return hash((self.type, self.data)) 143 144 145 class InvalidString(bytes): 146 """Subclass of bytes to hold invalid UTF-8 strings.""" 147 148 149 ############################################################################## 150 # Ext Serializable Decorator 151 ############################################################################## 152 153 _ext_class_to_type = {} 154 _ext_type_to_class = {} 155 156 157 def ext_serializable(ext_type): 158 """ 159 Return a decorator to register a class for automatic packing and unpacking 160 with the specified Ext type code. The application class should implement a 161 `packb()` method that returns serialized bytes, and an `unpackb()` class 162 method or static method that accepts serialized bytes and returns an 163 instance of the application class. 164 165 Args: 166 ext_type: application-defined Ext type code 167 168 Raises: 169 TypeError: 170 Ext type is not an integer. 171 ValueError: 172 Ext type is out of range of -128 to 127. 173 ValueError: 174 Ext type or class already registered. 175 """ 176 def wrapper(cls): 177 if not isinstance(ext_type, int): 178 raise TypeError("Ext type is not type integer") 179 elif not (-2**7 <= ext_type <= 2**7 - 1): 180 raise ValueError("Ext type value {:d} is out of range of -128 to 127".format(ext_type)) 181 elif ext_type in _ext_type_to_class: 182 raise ValueError("Ext type {:d} already registered with class {:s}".format(ext_type, repr(_ext_type_to_class[ext_type]))) 183 elif cls in _ext_class_to_type: 184 raise ValueError("Class {:s} already registered with Ext type {:d}".format(repr(cls), ext_type)) 185 186 _ext_type_to_class[ext_type] = cls 187 _ext_class_to_type[cls] = ext_type 188 189 return cls 190 191 return wrapper 192 193 194 ############################################################################## 195 # Exceptions 196 ############################################################################## 197 198 199 # Base Exception classes 200 class PackException(Exception): 201 "Base class for exceptions encountered during packing." 202 203 204 class UnpackException(Exception): 205 "Base class for exceptions encountered during unpacking." 206 207 208 # Packing error 209 class UnsupportedTypeException(PackException): 210 "Object type not supported for packing." 211 212 213 # Unpacking error 214 class InsufficientDataException(UnpackException): 215 "Insufficient data to unpack the serialized object." 216 217 218 class InvalidStringException(UnpackException): 219 "Invalid UTF-8 string encountered during unpacking." 220 221 222 class UnsupportedTimestampException(UnpackException): 223 "Unsupported timestamp format encountered during unpacking." 224 225 226 class ReservedCodeException(UnpackException): 227 "Reserved code encountered during unpacking." 228 229 230 class UnhashableKeyException(UnpackException): 231 """ 232 Unhashable key encountered during map unpacking. 233 The serialized map cannot be deserialized into a Python dictionary. 234 """ 235 236 237 class DuplicateKeyException(UnpackException): 238 "Duplicate key encountered during map unpacking." 239 240 241 # Backwards compatibility 242 KeyNotPrimitiveException = UnhashableKeyException 243 KeyDuplicateException = DuplicateKeyException 244 245 ############################################################################# 246 # Exported Functions and Glob 247 ############################################################################# 248 249 # Exported functions and variables, set up in __init() 250 pack = None 251 packb = None 252 unpack = None 253 unpackb = None 254 dump = None 255 dumps = None 256 load = None 257 loads = None 258 259 compatibility = False 260 """ 261 Compatibility mode boolean. 262 263 When compatibility mode is enabled, u-msgpack-python will serialize both 264 unicode strings and bytes into the old "raw" msgpack type, and deserialize the 265 "raw" msgpack type into bytes. This provides backwards compatibility with the 266 old MessagePack specification. 267 268 Example: 269 >>> umsgpack.compatibility = True 270 >>> 271 >>> umsgpack.packb([u"some string", b"some bytes"]) 272 b'\x92\xabsome string\xaasome bytes' 273 >>> umsgpack.unpackb(_) 274 [b'some string', b'some bytes'] 275 >>> 276 """ 277 278 ############################################################################## 279 # Packing 280 ############################################################################## 281 282 # You may notice struct.pack("B", obj) instead of the simpler chr(obj) in the 283 # code below. This is to allow for seamless Python 2 and 3 compatibility, as 284 # chr(obj) has a str return type instead of bytes in Python 3, and 285 # struct.pack(...) has the right return type in both versions. 286 287 288 def _pack_integer(obj, fp, options): 289 if obj < 0: 290 if obj >= -32: 291 fp.write(struct.pack("b", obj)) 292 elif obj >= -2**(8 - 1): 293 fp.write(b"\xd0" + struct.pack("b", obj)) 294 elif obj >= -2**(16 - 1): 295 fp.write(b"\xd1" + struct.pack(">h", obj)) 296 elif obj >= -2**(32 - 1): 297 fp.write(b"\xd2" + struct.pack(">i", obj)) 298 elif obj >= -2**(64 - 1): 299 fp.write(b"\xd3" + struct.pack(">q", obj)) 300 else: 301 raise UnsupportedTypeException("huge signed int") 302 else: 303 if obj < 128: 304 fp.write(struct.pack("B", obj)) 305 elif obj < 2**8: 306 fp.write(b"\xcc" + struct.pack("B", obj)) 307 elif obj < 2**16: 308 fp.write(b"\xcd" + struct.pack(">H", obj)) 309 elif obj < 2**32: 310 fp.write(b"\xce" + struct.pack(">I", obj)) 311 elif obj < 2**64: 312 fp.write(b"\xcf" + struct.pack(">Q", obj)) 313 else: 314 raise UnsupportedTypeException("huge unsigned int") 315 316 317 def _pack_nil(obj, fp, options): 318 fp.write(b"\xc0") 319 320 321 def _pack_boolean(obj, fp, options): 322 fp.write(b"\xc3" if obj else b"\xc2") 323 324 325 def _pack_float(obj, fp, options): 326 float_precision = options.get('force_float_precision', _float_precision) 327 328 if float_precision == "double": 329 fp.write(b"\xcb" + struct.pack(">d", obj)) 330 elif float_precision == "single": 331 fp.write(b"\xca" + struct.pack(">f", obj)) 332 else: 333 raise ValueError("invalid float precision") 334 335 336 def _pack_string(obj, fp, options): 337 obj = obj.encode('utf-8') 338 obj_len = len(obj) 339 if obj_len < 32: 340 fp.write(struct.pack("B", 0xa0 | obj_len) + obj) 341 elif obj_len < 2**8: 342 fp.write(b"\xd9" + struct.pack("B", obj_len) + obj) 343 elif obj_len < 2**16: 344 fp.write(b"\xda" + struct.pack(">H", obj_len) + obj) 345 elif obj_len < 2**32: 346 fp.write(b"\xdb" + struct.pack(">I", obj_len) + obj) 347 else: 348 raise UnsupportedTypeException("huge string") 349 350 351 def _pack_binary(obj, fp, options): 352 obj_len = len(obj) 353 if obj_len < 2**8: 354 fp.write(b"\xc4" + struct.pack("B", obj_len) + obj) 355 elif obj_len < 2**16: 356 fp.write(b"\xc5" + struct.pack(">H", obj_len) + obj) 357 elif obj_len < 2**32: 358 fp.write(b"\xc6" + struct.pack(">I", obj_len) + obj) 359 else: 360 raise UnsupportedTypeException("huge binary string") 361 362 363 def _pack_oldspec_raw(obj, fp, options): 364 obj_len = len(obj) 365 if obj_len < 32: 366 fp.write(struct.pack("B", 0xa0 | obj_len) + obj) 367 elif obj_len < 2**16: 368 fp.write(b"\xda" + struct.pack(">H", obj_len) + obj) 369 elif obj_len < 2**32: 370 fp.write(b"\xdb" + struct.pack(">I", obj_len) + obj) 371 else: 372 raise UnsupportedTypeException("huge raw string") 373 374 375 def _pack_ext(obj, fp, options): 376 obj_len = len(obj.data) 377 if obj_len == 1: 378 fp.write(b"\xd4" + struct.pack("B", obj.type & 0xff) + obj.data) 379 elif obj_len == 2: 380 fp.write(b"\xd5" + struct.pack("B", obj.type & 0xff) + obj.data) 381 elif obj_len == 4: 382 fp.write(b"\xd6" + struct.pack("B", obj.type & 0xff) + obj.data) 383 elif obj_len == 8: 384 fp.write(b"\xd7" + struct.pack("B", obj.type & 0xff) + obj.data) 385 elif obj_len == 16: 386 fp.write(b"\xd8" + struct.pack("B", obj.type & 0xff) + obj.data) 387 elif obj_len < 2**8: 388 fp.write(b"\xc7" + struct.pack("BB", obj_len, obj.type & 0xff) + obj.data) 389 elif obj_len < 2**16: 390 fp.write(b"\xc8" + struct.pack(">HB", obj_len, obj.type & 0xff) + obj.data) 391 elif obj_len < 2**32: 392 fp.write(b"\xc9" + struct.pack(">IB", obj_len, obj.type & 0xff) + obj.data) 393 else: 394 raise UnsupportedTypeException("huge ext data") 395 396 397 def _pack_ext_timestamp(obj, fp, options): 398 if not obj.tzinfo: 399 # Object is naive datetime, convert to aware date time, 400 # assuming UTC timezone 401 delta = obj.replace(tzinfo=_utc_tzinfo) - _epoch 402 else: 403 # Object is aware datetime 404 delta = obj - _epoch 405 406 seconds = delta.seconds + delta.days * 86400 407 microseconds = delta.microseconds 408 409 if microseconds == 0 and 0 <= seconds <= 2**32 - 1: 410 # 32-bit timestamp 411 fp.write(b"\xd6\xff" + struct.pack(">I", seconds)) 412 elif 0 <= seconds <= 2**34 - 1: 413 # 64-bit timestamp 414 value = ((microseconds * 1000) << 34) | seconds 415 fp.write(b"\xd7\xff" + struct.pack(">Q", value)) 416 elif -2**63 <= abs(seconds) <= 2**63 - 1: 417 # 96-bit timestamp 418 fp.write(b"\xc7\x0c\xff" + struct.pack(">Iq", microseconds * 1000, seconds)) 419 else: 420 raise UnsupportedTypeException("huge timestamp") 421 422 423 def _pack_array(obj, fp, options): 424 obj_len = len(obj) 425 if obj_len < 16: 426 fp.write(struct.pack("B", 0x90 | obj_len)) 427 elif obj_len < 2**16: 428 fp.write(b"\xdc" + struct.pack(">H", obj_len)) 429 elif obj_len < 2**32: 430 fp.write(b"\xdd" + struct.pack(">I", obj_len)) 431 else: 432 raise UnsupportedTypeException("huge array") 433 434 for e in obj: 435 pack(e, fp, **options) 436 437 438 def _pack_map(obj, fp, options): 439 obj_len = len(obj) 440 if obj_len < 16: 441 fp.write(struct.pack("B", 0x80 | obj_len)) 442 elif obj_len < 2**16: 443 fp.write(b"\xde" + struct.pack(">H", obj_len)) 444 elif obj_len < 2**32: 445 fp.write(b"\xdf" + struct.pack(">I", obj_len)) 446 else: 447 raise UnsupportedTypeException("huge array") 448 449 for k, v in obj.items(): 450 pack(k, fp, **options) 451 pack(v, fp, **options) 452 453 ######################################## 454 455 456 # Pack for Python 2, with 'unicode' type, 'str' type, and 'long' type 457 def _pack2(obj, fp, **options): 458 """ 459 Serialize a Python object into MessagePack bytes. 460 461 Args: 462 obj: a Python object 463 fp: a .write()-supporting file-like object 464 465 Kwargs: 466 ext_handlers (dict): dictionary of Ext handlers, mapping a custom type 467 to a callable that packs an instance of the type 468 into an Ext object 469 force_float_precision (str): "single" to force packing floats as 470 IEEE-754 single-precision floats, 471 "double" to force packing floats as 472 IEEE-754 double-precision floats. 473 474 Returns: 475 None. 476 477 Raises: 478 UnsupportedType(PackException): 479 Object type not supported for packing. 480 481 Example: 482 >>> f = open('test.bin', 'wb') 483 >>> umsgpack.pack({u"compact": True, u"schema": 0}, f) 484 >>> 485 """ 486 global compatibility 487 488 ext_handlers = options.get("ext_handlers") 489 490 if obj is None: 491 _pack_nil(obj, fp, options) 492 elif ext_handlers and obj.__class__ in ext_handlers: 493 _pack_ext(ext_handlers[obj.__class__](obj), fp, options) 494 elif obj.__class__ in _ext_class_to_type: 495 try: 496 _pack_ext(Ext(_ext_class_to_type[obj.__class__], obj.packb()), fp, options) 497 except AttributeError: 498 raise NotImplementedError("Ext serializable class {:s} is missing implementation of packb()".format(repr(obj.__class__))) 499 elif isinstance(obj, bool): 500 _pack_boolean(obj, fp, options) 501 elif isinstance(obj, (int, long)): 502 _pack_integer(obj, fp, options) 503 elif isinstance(obj, float): 504 _pack_float(obj, fp, options) 505 elif compatibility and isinstance(obj, unicode): 506 _pack_oldspec_raw(bytes(obj), fp, options) 507 elif compatibility and isinstance(obj, bytes): 508 _pack_oldspec_raw(obj, fp, options) 509 elif isinstance(obj, unicode): 510 _pack_string(obj, fp, options) 511 elif isinstance(obj, str): 512 _pack_binary(obj, fp, options) 513 elif isinstance(obj, (list, tuple)): 514 _pack_array(obj, fp, options) 515 elif isinstance(obj, dict): 516 _pack_map(obj, fp, options) 517 elif isinstance(obj, datetime.datetime): 518 _pack_ext_timestamp(obj, fp, options) 519 elif isinstance(obj, Ext): 520 _pack_ext(obj, fp, options) 521 elif ext_handlers: 522 # Linear search for superclass 523 t = next((t for t in ext_handlers.keys() if isinstance(obj, t)), None) 524 if t: 525 _pack_ext(ext_handlers[t](obj), fp, options) 526 else: 527 raise UnsupportedTypeException( 528 "unsupported type: {:s}".format(str(type(obj)))) 529 elif _ext_class_to_type: 530 # Linear search for superclass 531 t = next((t for t in _ext_class_to_type if isinstance(obj, t)), None) 532 if t: 533 try: 534 _pack_ext(Ext(_ext_class_to_type[t], obj.packb()), fp, options) 535 except AttributeError: 536 raise NotImplementedError("Ext serializable class {:s} is missing implementation of packb()".format(repr(t))) 537 else: 538 raise UnsupportedTypeException("unsupported type: {:s}".format(str(type(obj)))) 539 else: 540 raise UnsupportedTypeException("unsupported type: {:s}".format(str(type(obj)))) 541 542 543 # Pack for Python 3, with unicode 'str' type, 'bytes' type, and no 'long' type 544 def _pack3(obj, fp, **options): 545 """ 546 Serialize a Python object into MessagePack bytes. 547 548 Args: 549 obj: a Python object 550 fp: a .write()-supporting file-like object 551 552 Kwargs: 553 ext_handlers (dict): dictionary of Ext handlers, mapping a custom type 554 to a callable that packs an instance of the type 555 into an Ext object 556 force_float_precision (str): "single" to force packing floats as 557 IEEE-754 single-precision floats, 558 "double" to force packing floats as 559 IEEE-754 double-precision floats. 560 561 Returns: 562 None. 563 564 Raises: 565 UnsupportedType(PackException): 566 Object type not supported for packing. 567 568 Example: 569 >>> f = open('test.bin', 'wb') 570 >>> umsgpack.pack({u"compact": True, u"schema": 0}, f) 571 >>> 572 """ 573 global compatibility 574 575 ext_handlers = options.get("ext_handlers") 576 577 if obj is None: 578 _pack_nil(obj, fp, options) 579 elif ext_handlers and obj.__class__ in ext_handlers: 580 _pack_ext(ext_handlers[obj.__class__](obj), fp, options) 581 elif obj.__class__ in _ext_class_to_type: 582 try: 583 _pack_ext(Ext(_ext_class_to_type[obj.__class__], obj.packb()), fp, options) 584 except AttributeError: 585 raise NotImplementedError("Ext serializable class {:s} is missing implementation of packb()".format(repr(obj.__class__))) 586 elif isinstance(obj, bool): 587 _pack_boolean(obj, fp, options) 588 elif isinstance(obj, int): 589 _pack_integer(obj, fp, options) 590 elif isinstance(obj, float): 591 _pack_float(obj, fp, options) 592 elif compatibility and isinstance(obj, str): 593 _pack_oldspec_raw(obj.encode('utf-8'), fp, options) 594 elif compatibility and isinstance(obj, bytes): 595 _pack_oldspec_raw(obj, fp, options) 596 elif isinstance(obj, str): 597 _pack_string(obj, fp, options) 598 elif isinstance(obj, bytes): 599 _pack_binary(obj, fp, options) 600 elif isinstance(obj, (list, tuple)): 601 _pack_array(obj, fp, options) 602 elif isinstance(obj, dict): 603 _pack_map(obj, fp, options) 604 elif isinstance(obj, datetime.datetime): 605 _pack_ext_timestamp(obj, fp, options) 606 elif isinstance(obj, Ext): 607 _pack_ext(obj, fp, options) 608 elif ext_handlers: 609 # Linear search for superclass 610 t = next((t for t in ext_handlers.keys() if isinstance(obj, t)), None) 611 if t: 612 _pack_ext(ext_handlers[t](obj), fp, options) 613 else: 614 raise UnsupportedTypeException( 615 "unsupported type: {:s}".format(str(type(obj)))) 616 elif _ext_class_to_type: 617 # Linear search for superclass 618 t = next((t for t in _ext_class_to_type if isinstance(obj, t)), None) 619 if t: 620 try: 621 _pack_ext(Ext(_ext_class_to_type[t], obj.packb()), fp, options) 622 except AttributeError: 623 raise NotImplementedError("Ext serializable class {:s} is missing implementation of packb()".format(repr(t))) 624 else: 625 raise UnsupportedTypeException("unsupported type: {:s}".format(str(type(obj)))) 626 else: 627 raise UnsupportedTypeException( 628 "unsupported type: {:s}".format(str(type(obj)))) 629 630 631 def _packb2(obj, **options): 632 """ 633 Serialize a Python object into MessagePack bytes. 634 635 Args: 636 obj: a Python object 637 638 Kwargs: 639 ext_handlers (dict): dictionary of Ext handlers, mapping a custom type 640 to a callable that packs an instance of the type 641 into an Ext object 642 force_float_precision (str): "single" to force packing floats as 643 IEEE-754 single-precision floats, 644 "double" to force packing floats as 645 IEEE-754 double-precision floats. 646 647 Returns: 648 A 'str' containing serialized MessagePack bytes. 649 650 Raises: 651 UnsupportedType(PackException): 652 Object type not supported for packing. 653 654 Example: 655 >>> umsgpack.packb({u"compact": True, u"schema": 0}) 656 '\x82\xa7compact\xc3\xa6schema\x00' 657 >>> 658 """ 659 fp = io.BytesIO() 660 _pack2(obj, fp, **options) 661 return fp.getvalue() 662 663 664 def _packb3(obj, **options): 665 """ 666 Serialize a Python object into MessagePack bytes. 667 668 Args: 669 obj: a Python object 670 671 Kwargs: 672 ext_handlers (dict): dictionary of Ext handlers, mapping a custom type 673 to a callable that packs an instance of the type 674 into an Ext object 675 force_float_precision (str): "single" to force packing floats as 676 IEEE-754 single-precision floats, 677 "double" to force packing floats as 678 IEEE-754 double-precision floats. 679 680 Returns: 681 A 'bytes' containing serialized MessagePack bytes. 682 683 Raises: 684 UnsupportedType(PackException): 685 Object type not supported for packing. 686 687 Example: 688 >>> umsgpack.packb({u"compact": True, u"schema": 0}) 689 b'\x82\xa7compact\xc3\xa6schema\x00' 690 >>> 691 """ 692 fp = io.BytesIO() 693 _pack3(obj, fp, **options) 694 return fp.getvalue() 695 696 ############################################################################# 697 # Unpacking 698 ############################################################################# 699 700 701 def _read_except(fp, n): 702 if n == 0: 703 return b"" 704 705 data = fp.read(n) 706 if len(data) == 0: 707 raise InsufficientDataException() 708 709 while len(data) < n: 710 chunk = fp.read(n - len(data)) 711 if len(chunk) == 0: 712 raise InsufficientDataException() 713 714 data += chunk 715 716 return data 717 718 719 def _unpack_integer(code, fp, options): 720 if (ord(code) & 0xe0) == 0xe0: 721 return struct.unpack("b", code)[0] 722 elif code == b'\xd0': 723 return struct.unpack("b", _read_except(fp, 1))[0] 724 elif code == b'\xd1': 725 return struct.unpack(">h", _read_except(fp, 2))[0] 726 elif code == b'\xd2': 727 return struct.unpack(">i", _read_except(fp, 4))[0] 728 elif code == b'\xd3': 729 return struct.unpack(">q", _read_except(fp, 8))[0] 730 elif (ord(code) & 0x80) == 0x00: 731 return struct.unpack("B", code)[0] 732 elif code == b'\xcc': 733 return struct.unpack("B", _read_except(fp, 1))[0] 734 elif code == b'\xcd': 735 return struct.unpack(">H", _read_except(fp, 2))[0] 736 elif code == b'\xce': 737 return struct.unpack(">I", _read_except(fp, 4))[0] 738 elif code == b'\xcf': 739 return struct.unpack(">Q", _read_except(fp, 8))[0] 740 raise Exception("logic error, not int: 0x{:02x}".format(ord(code))) 741 742 743 def _unpack_reserved(code, fp, options): 744 if code == b'\xc1': 745 raise ReservedCodeException( 746 "encountered reserved code: 0x{:02x}".format(ord(code))) 747 raise Exception( 748 "logic error, not reserved code: 0x{:02x}".format(ord(code))) 749 750 751 def _unpack_nil(code, fp, options): 752 if code == b'\xc0': 753 return None 754 raise Exception("logic error, not nil: 0x{:02x}".format(ord(code))) 755 756 757 def _unpack_boolean(code, fp, options): 758 if code == b'\xc2': 759 return False 760 elif code == b'\xc3': 761 return True 762 raise Exception("logic error, not boolean: 0x{:02x}".format(ord(code))) 763 764 765 def _unpack_float(code, fp, options): 766 if code == b'\xca': 767 return struct.unpack(">f", _read_except(fp, 4))[0] 768 elif code == b'\xcb': 769 return struct.unpack(">d", _read_except(fp, 8))[0] 770 raise Exception("logic error, not float: 0x{:02x}".format(ord(code))) 771 772 773 def _unpack_string(code, fp, options): 774 if (ord(code) & 0xe0) == 0xa0: 775 length = ord(code) & ~0xe0 776 elif code == b'\xd9': 777 length = struct.unpack("B", _read_except(fp, 1))[0] 778 elif code == b'\xda': 779 length = struct.unpack(">H", _read_except(fp, 2))[0] 780 elif code == b'\xdb': 781 length = struct.unpack(">I", _read_except(fp, 4))[0] 782 else: 783 raise Exception("logic error, not string: 0x{:02x}".format(ord(code))) 784 785 # Always return raw bytes in compatibility mode 786 global compatibility 787 if compatibility: 788 return _read_except(fp, length) 789 790 data = _read_except(fp, length) 791 try: 792 return bytes.decode(data, 'utf-8') 793 except UnicodeDecodeError: 794 if options.get("allow_invalid_utf8"): 795 return InvalidString(data) 796 raise InvalidStringException("unpacked string is invalid utf-8") 797 798 799 def _unpack_binary(code, fp, options): 800 if code == b'\xc4': 801 length = struct.unpack("B", _read_except(fp, 1))[0] 802 elif code == b'\xc5': 803 length = struct.unpack(">H", _read_except(fp, 2))[0] 804 elif code == b'\xc6': 805 length = struct.unpack(">I", _read_except(fp, 4))[0] 806 else: 807 raise Exception("logic error, not binary: 0x{:02x}".format(ord(code))) 808 809 return _read_except(fp, length) 810 811 812 def _unpack_ext(code, fp, options): 813 if code == b'\xd4': 814 length = 1 815 elif code == b'\xd5': 816 length = 2 817 elif code == b'\xd6': 818 length = 4 819 elif code == b'\xd7': 820 length = 8 821 elif code == b'\xd8': 822 length = 16 823 elif code == b'\xc7': 824 length = struct.unpack("B", _read_except(fp, 1))[0] 825 elif code == b'\xc8': 826 length = struct.unpack(">H", _read_except(fp, 2))[0] 827 elif code == b'\xc9': 828 length = struct.unpack(">I", _read_except(fp, 4))[0] 829 else: 830 raise Exception("logic error, not ext: 0x{:02x}".format(ord(code))) 831 832 ext_type = struct.unpack("b", _read_except(fp, 1))[0] 833 ext_data = _read_except(fp, length) 834 835 # Unpack with ext handler, if we have one 836 ext_handlers = options.get("ext_handlers") 837 if ext_handlers and ext_type in ext_handlers: 838 return ext_handlers[ext_type](Ext(ext_type, ext_data)) 839 840 # Unpack with ext classes, if type is registered 841 if ext_type in _ext_type_to_class: 842 try: 843 return _ext_type_to_class[ext_type].unpackb(ext_data) 844 except AttributeError: 845 raise NotImplementedError("Ext serializable class {:s} is missing implementation of unpackb()".format(repr(_ext_type_to_class[ext_type]))) 846 847 # Timestamp extension 848 if ext_type == -1: 849 return _unpack_ext_timestamp(ext_data, options) 850 851 return Ext(ext_type, ext_data) 852 853 854 def _unpack_ext_timestamp(ext_data, options): 855 obj_len = len(ext_data) 856 if obj_len == 4: 857 # 32-bit timestamp 858 seconds = struct.unpack(">I", ext_data)[0] 859 microseconds = 0 860 elif obj_len == 8: 861 # 64-bit timestamp 862 value = struct.unpack(">Q", ext_data)[0] 863 seconds = value & 0x3ffffffff 864 microseconds = (value >> 34) // 1000 865 elif obj_len == 12: 866 # 96-bit timestamp 867 seconds = struct.unpack(">q", ext_data[4:12])[0] 868 microseconds = struct.unpack(">I", ext_data[0:4])[0] // 1000 869 else: 870 raise UnsupportedTimestampException( 871 "unsupported timestamp with data length {:d}".format(len(ext_data))) 872 873 return _epoch + datetime.timedelta(seconds=seconds, 874 microseconds=microseconds) 875 876 877 def _unpack_array(code, fp, options): 878 if (ord(code) & 0xf0) == 0x90: 879 length = (ord(code) & ~0xf0) 880 elif code == b'\xdc': 881 length = struct.unpack(">H", _read_except(fp, 2))[0] 882 elif code == b'\xdd': 883 length = struct.unpack(">I", _read_except(fp, 4))[0] 884 else: 885 raise Exception("logic error, not array: 0x{:02x}".format(ord(code))) 886 887 if options.get('use_tuple'): 888 return tuple((_unpack(fp, options) for i in xrange(length))) 889 890 return [_unpack(fp, options) for i in xrange(length)] 891 892 893 def _deep_list_to_tuple(obj): 894 if isinstance(obj, list): 895 return tuple([_deep_list_to_tuple(e) for e in obj]) 896 return obj 897 898 899 def _unpack_map(code, fp, options): 900 if (ord(code) & 0xf0) == 0x80: 901 length = (ord(code) & ~0xf0) 902 elif code == b'\xde': 903 length = struct.unpack(">H", _read_except(fp, 2))[0] 904 elif code == b'\xdf': 905 length = struct.unpack(">I", _read_except(fp, 4))[0] 906 else: 907 raise Exception("logic error, not map: 0x{:02x}".format(ord(code))) 908 909 d = {} if not options.get('use_ordered_dict') else collections.OrderedDict() 910 for _ in xrange(length): 911 # Unpack key 912 k = _unpack(fp, options) 913 914 if isinstance(k, list): 915 # Attempt to convert list into a hashable tuple 916 k = _deep_list_to_tuple(k) 917 elif not isinstance(k, Hashable): 918 raise UnhashableKeyException( 919 "encountered unhashable key: \"{:s}\" ({:s})".format(str(k), str(type(k)))) 920 elif k in d: 921 raise DuplicateKeyException( 922 "encountered duplicate key: \"{:s}\" ({:s})".format(str(k), str(type(k)))) 923 924 # Unpack value 925 v = _unpack(fp, options) 926 927 try: 928 d[k] = v 929 except TypeError: 930 raise UnhashableKeyException( 931 "encountered unhashable key: \"{:s}\"".format(str(k))) 932 return d 933 934 935 def _unpack(fp, options): 936 code = _read_except(fp, 1) 937 return _unpack_dispatch_table[code](code, fp, options) 938 939 ######################################## 940 941 942 def _unpack2(fp, **options): 943 """ 944 Deserialize MessagePack bytes into a Python object. 945 946 Args: 947 fp: a .read()-supporting file-like object 948 949 Kwargs: 950 ext_handlers (dict): dictionary of Ext handlers, mapping integer Ext 951 type to a callable that unpacks an instance of 952 Ext into an object 953 use_ordered_dict (bool): unpack maps into OrderedDict, instead of 954 unordered dict (default False) 955 use_tuple (bool): unpacks arrays into tuples, instead of lists (default 956 False) 957 allow_invalid_utf8 (bool): unpack invalid strings into instances of 958 InvalidString, for access to the bytes 959 (default False) 960 961 Returns: 962 A Python object. 963 964 Raises: 965 InsufficientDataException(UnpackException): 966 Insufficient data to unpack the serialized object. 967 InvalidStringException(UnpackException): 968 Invalid UTF-8 string encountered during unpacking. 969 UnsupportedTimestampException(UnpackException): 970 Unsupported timestamp format encountered during unpacking. 971 ReservedCodeException(UnpackException): 972 Reserved code encountered during unpacking. 973 UnhashableKeyException(UnpackException): 974 Unhashable key encountered during map unpacking. 975 The serialized map cannot be deserialized into a Python dictionary. 976 DuplicateKeyException(UnpackException): 977 Duplicate key encountered during map unpacking. 978 979 Example: 980 >>> f = open('test.bin', 'rb') 981 >>> umsgpack.unpackb(f) 982 {u'compact': True, u'schema': 0} 983 >>> 984 """ 985 return _unpack(fp, options) 986 987 988 def _unpack3(fp, **options): 989 """ 990 Deserialize MessagePack bytes into a Python object. 991 992 Args: 993 fp: a .read()-supporting file-like object 994 995 Kwargs: 996 ext_handlers (dict): dictionary of Ext handlers, mapping integer Ext 997 type to a callable that unpacks an instance of 998 Ext into an object 999 use_ordered_dict (bool): unpack maps into OrderedDict, instead of 1000 unordered dict (default False) 1001 use_tuple (bool): unpacks arrays into tuples, instead of lists (default 1002 False) 1003 allow_invalid_utf8 (bool): unpack invalid strings into instances of 1004 InvalidString, for access to the bytes 1005 (default False) 1006 1007 Returns: 1008 A Python object. 1009 1010 Raises: 1011 InsufficientDataException(UnpackException): 1012 Insufficient data to unpack the serialized object. 1013 InvalidStringException(UnpackException): 1014 Invalid UTF-8 string encountered during unpacking. 1015 UnsupportedTimestampException(UnpackException): 1016 Unsupported timestamp format encountered during unpacking. 1017 ReservedCodeException(UnpackException): 1018 Reserved code encountered during unpacking. 1019 UnhashableKeyException(UnpackException): 1020 Unhashable key encountered during map unpacking. 1021 The serialized map cannot be deserialized into a Python dictionary. 1022 DuplicateKeyException(UnpackException): 1023 Duplicate key encountered during map unpacking. 1024 1025 Example: 1026 >>> f = open('test.bin', 'rb') 1027 >>> umsgpack.unpackb(f) 1028 {'compact': True, 'schema': 0} 1029 >>> 1030 """ 1031 return _unpack(fp, options) 1032 1033 1034 # For Python 2, expects a str object 1035 def _unpackb2(s, **options): 1036 """ 1037 Deserialize MessagePack bytes into a Python object. 1038 1039 Args: 1040 s: a 'str' or 'bytearray' containing serialized MessagePack bytes 1041 1042 Kwargs: 1043 ext_handlers (dict): dictionary of Ext handlers, mapping integer Ext 1044 type to a callable that unpacks an instance of 1045 Ext into an object 1046 use_ordered_dict (bool): unpack maps into OrderedDict, instead of 1047 unordered dict (default False) 1048 use_tuple (bool): unpacks arrays into tuples, instead of lists (default 1049 False) 1050 allow_invalid_utf8 (bool): unpack invalid strings into instances of 1051 InvalidString, for access to the bytes 1052 (default False) 1053 1054 Returns: 1055 A Python object. 1056 1057 Raises: 1058 TypeError: 1059 Packed data type is neither 'str' nor 'bytearray'. 1060 InsufficientDataException(UnpackException): 1061 Insufficient data to unpack the serialized object. 1062 InvalidStringException(UnpackException): 1063 Invalid UTF-8 string encountered during unpacking. 1064 UnsupportedTimestampException(UnpackException): 1065 Unsupported timestamp format encountered during unpacking. 1066 ReservedCodeException(UnpackException): 1067 Reserved code encountered during unpacking. 1068 UnhashableKeyException(UnpackException): 1069 Unhashable key encountered during map unpacking. 1070 The serialized map cannot be deserialized into a Python dictionary. 1071 DuplicateKeyException(UnpackException): 1072 Duplicate key encountered during map unpacking. 1073 1074 Example: 1075 >>> umsgpack.unpackb(b'\x82\xa7compact\xc3\xa6schema\x00') 1076 {u'compact': True, u'schema': 0} 1077 >>> 1078 """ 1079 if not isinstance(s, (str, bytearray)): 1080 raise TypeError("packed data must be type 'str' or 'bytearray'") 1081 return _unpack(io.BytesIO(s), options) 1082 1083 1084 # For Python 3, expects a bytes object 1085 def _unpackb3(s, **options): 1086 """ 1087 Deserialize MessagePack bytes into a Python object. 1088 1089 Args: 1090 s: a 'bytes' or 'bytearray' containing serialized MessagePack bytes 1091 1092 Kwargs: 1093 ext_handlers (dict): dictionary of Ext handlers, mapping integer Ext 1094 type to a callable that unpacks an instance of 1095 Ext into an object 1096 use_ordered_dict (bool): unpack maps into OrderedDict, instead of 1097 unordered dict (default False) 1098 use_tuple (bool): unpacks arrays into tuples, instead of lists (default 1099 False) 1100 allow_invalid_utf8 (bool): unpack invalid strings into instances of 1101 InvalidString, for access to the bytes 1102 (default False) 1103 1104 Returns: 1105 A Python object. 1106 1107 Raises: 1108 TypeError: 1109 Packed data type is neither 'bytes' nor 'bytearray'. 1110 InsufficientDataException(UnpackException): 1111 Insufficient data to unpack the serialized object. 1112 InvalidStringException(UnpackException): 1113 Invalid UTF-8 string encountered during unpacking. 1114 UnsupportedTimestampException(UnpackException): 1115 Unsupported timestamp format encountered during unpacking. 1116 ReservedCodeException(UnpackException): 1117 Reserved code encountered during unpacking. 1118 UnhashableKeyException(UnpackException): 1119 Unhashable key encountered during map unpacking. 1120 The serialized map cannot be deserialized into a Python dictionary. 1121 DuplicateKeyException(UnpackException): 1122 Duplicate key encountered during map unpacking. 1123 1124 Example: 1125 >>> umsgpack.unpackb(b'\x82\xa7compact\xc3\xa6schema\x00') 1126 {'compact': True, 'schema': 0} 1127 >>> 1128 """ 1129 if not isinstance(s, (bytes, bytearray)): 1130 raise TypeError("packed data must be type 'bytes' or 'bytearray'") 1131 return _unpack(io.BytesIO(s), options) 1132 1133 ############################################################################# 1134 # Module Initialization 1135 ############################################################################# 1136 1137 1138 def __init(): 1139 global pack 1140 global packb 1141 global unpack 1142 global unpackb 1143 global dump 1144 global dumps 1145 global load 1146 global loads 1147 global compatibility 1148 global _epoch 1149 global _utc_tzinfo 1150 global _float_precision 1151 global _unpack_dispatch_table 1152 global xrange 1153 1154 # Compatibility mode for handling strings/bytes with the old specification 1155 compatibility = False 1156 1157 if sys.version_info[0] == 3: 1158 _utc_tzinfo = datetime.timezone.utc 1159 else: 1160 class UTC(datetime.tzinfo): 1161 ZERO = datetime.timedelta(0) 1162 1163 def utcoffset(self, dt): 1164 return UTC.ZERO 1165 1166 def tzname(self, dt): 1167 return "UTC" 1168 1169 def dst(self, dt): 1170 return UTC.ZERO 1171 1172 _utc_tzinfo = UTC() 1173 1174 # Calculate an aware epoch datetime 1175 _epoch = datetime.datetime(1970, 1, 1, tzinfo=_utc_tzinfo) 1176 1177 # Auto-detect system float precision 1178 if sys.float_info.mant_dig == 53: 1179 _float_precision = "double" 1180 else: 1181 _float_precision = "single" 1182 1183 # Map packb and unpackb to the appropriate version 1184 if sys.version_info[0] == 3: 1185 pack = _pack3 1186 packb = _packb3 1187 dump = _pack3 1188 dumps = _packb3 1189 unpack = _unpack3 1190 unpackb = _unpackb3 1191 load = _unpack3 1192 loads = _unpackb3 1193 xrange = range 1194 else: 1195 pack = _pack2 1196 packb = _packb2 1197 dump = _pack2 1198 dumps = _packb2 1199 unpack = _unpack2 1200 unpackb = _unpackb2 1201 load = _unpack2 1202 loads = _unpackb2 1203 1204 # Build a dispatch table for fast lookup of unpacking function 1205 1206 _unpack_dispatch_table = {} 1207 # Fix uint 1208 for code in range(0, 0x7f + 1): 1209 _unpack_dispatch_table[struct.pack("B", code)] = _unpack_integer 1210 # Fix map 1211 for code in range(0x80, 0x8f + 1): 1212 _unpack_dispatch_table[struct.pack("B", code)] = _unpack_map 1213 # Fix array 1214 for code in range(0x90, 0x9f + 1): 1215 _unpack_dispatch_table[struct.pack("B", code)] = _unpack_array 1216 # Fix str 1217 for code in range(0xa0, 0xbf + 1): 1218 _unpack_dispatch_table[struct.pack("B", code)] = _unpack_string 1219 # Nil 1220 _unpack_dispatch_table[b'\xc0'] = _unpack_nil 1221 # Reserved 1222 _unpack_dispatch_table[b'\xc1'] = _unpack_reserved 1223 # Boolean 1224 _unpack_dispatch_table[b'\xc2'] = _unpack_boolean 1225 _unpack_dispatch_table[b'\xc3'] = _unpack_boolean 1226 # Bin 1227 for code in range(0xc4, 0xc6 + 1): 1228 _unpack_dispatch_table[struct.pack("B", code)] = _unpack_binary 1229 # Ext 1230 for code in range(0xc7, 0xc9 + 1): 1231 _unpack_dispatch_table[struct.pack("B", code)] = _unpack_ext 1232 # Float 1233 _unpack_dispatch_table[b'\xca'] = _unpack_float 1234 _unpack_dispatch_table[b'\xcb'] = _unpack_float 1235 # Uint 1236 for code in range(0xcc, 0xcf + 1): 1237 _unpack_dispatch_table[struct.pack("B", code)] = _unpack_integer 1238 # Int 1239 for code in range(0xd0, 0xd3 + 1): 1240 _unpack_dispatch_table[struct.pack("B", code)] = _unpack_integer 1241 # Fixext 1242 for code in range(0xd4, 0xd8 + 1): 1243 _unpack_dispatch_table[struct.pack("B", code)] = _unpack_ext 1244 # String 1245 for code in range(0xd9, 0xdb + 1): 1246 _unpack_dispatch_table[struct.pack("B", code)] = _unpack_string 1247 # Array 1248 _unpack_dispatch_table[b'\xdc'] = _unpack_array 1249 _unpack_dispatch_table[b'\xdd'] = _unpack_array 1250 # Map 1251 _unpack_dispatch_table[b'\xde'] = _unpack_map 1252 _unpack_dispatch_table[b'\xdf'] = _unpack_map 1253 # Negative fixint 1254 for code in range(0xe0, 0xff + 1): 1255 _unpack_dispatch_table[struct.pack("B", code)] = _unpack_integer 1256 1257 1258 __init()