/ src / python / txtai / serialize / pickle.py
pickle.py
 1  """
 2  Pickle module
 3  """
 4  
 5  import os
 6  import logging
 7  import pickle
 8  import warnings
 9  
10  from .base import Serialize
11  
12  # Logging configuration
13  logger = logging.getLogger(__name__)
14  
15  
16  class Pickle(Serialize):
17      """
18      Pickle serialization.
19      """
20  
21      def __init__(self, allowpickle=False):
22          """
23          Creates a new instance for Pickle serialization.
24  
25          This class ensures the allowpickle parameter or the `ALLOW_PICKLE` environment variable is True. All methods will
26          raise errors if this isn't the case.
27  
28          Pickle serialization is OK for local data but it isn't recommended when sharing data externally.
29  
30          Args:
31              allowpickle: default pickle allow mode, only True with methods that generate local temporary data
32          """
33  
34          # Parent constructor
35          super().__init__()
36  
37          # Default allow pickle mode
38          self.allowpickle = allowpickle
39  
40          # Current pickle protocol
41          self.version = 4
42  
43      def load(self, path):
44          # Load pickled data from path, if allowed
45          return super().load(path) if self.allow(path) else None
46  
47      def save(self, data, path):
48          # Save pickled data to path, if allowed
49          if self.allow():
50              super().save(data, path)
51  
52      def loadstream(self, stream):
53          # Load pickled data from stream, if allowed
54          return pickle.load(stream) if self.allow() else None
55  
56      def savestream(self, data, stream):
57          # Save pickled data to stream, if allowed
58          if self.allow():
59              pickle.dump(data, stream, protocol=self.version)
60  
61      def loadbytes(self, data):
62          # Load pickled data from bytes, if allowed
63          return pickle.loads(data) if self.allow() else None
64  
65      def savebytes(self, data):
66          # Save pickled data to stream, if allowed
67          return pickle.dumps(data, protocol=self.version) if self.allow() else None
68  
69      def allow(self, path=None):
70          """
71          Checks if loading and saving pickled data is allowed. Raises an error if it's not allowed.
72  
73          Args:
74              path: optional path to add to generated error messages
75          """
76  
77          enablepickle = self.allowpickle or os.environ.get("ALLOW_PICKLE", "False") in ("True", "1")
78          if not enablepickle:
79              raise ValueError(
80                  (
81                      "Loading of pickled index data is disabled. "
82                      f"`{path if path else 'stream'}` was not loaded. "
83                      "Set the env variable `ALLOW_PICKLE=True` to enable loading pickled index data. "
84                      "This should only be done for trusted and/or local data."
85                  )
86              )
87  
88          if not self.allowpickle:
89              warnings.warn(
90                  (
91                      "Loading of pickled data enabled through `ALLOW_PICKLE=True` env variable. "
92                      "This setting should only be used with trusted and/or local data. "
93                      "Saving this index will replace pickled index data formats with the latest index formats and remove this warning."
94                  ),
95                  RuntimeWarning,
96              )
97  
98          return enablepickle