/ src / python / txtai / archive / tar.py
tar.py
 1  """
 2  Tar module
 3  """
 4  
 5  import os
 6  import tarfile
 7  
 8  from .compress import Compress
 9  
10  
11  class Tar(Compress):
12      """
13      Tar compression
14      """
15  
16      def pack(self, path, output):
17          # Infer compression type
18          compression = self.compression(output)
19  
20          with tarfile.open(output, f"w:{compression}" if compression else "w") as tar:
21              tar.add(path, arcname=".")
22  
23      def unpack(self, path, output):
24          # Infer compression type
25          compression = self.compression(path)
26  
27          with tarfile.open(path, f"r:{compression}" if compression else "r") as tar:
28              # Validate paths
29              for member in tar.getmembers():
30                  fullpath = os.path.join(path, member.name)
31  
32                  # Reject paths outside of base directory and links
33                  if not self.validate(path, fullpath) or member.issym() or member.islnk():
34                      raise IOError(f"Invalid tar entry: {member.name}{'->' + member.linkname if member.linkname else ''}")
35  
36              # Unpack data. Apply default data filter to only allow basic TAR features.
37              kwargs = {"filter": "data"} if hasattr(tarfile, "data_filter") else {}
38              tar.extractall(output, **kwargs)
39  
40      def compression(self, path):
41          """
42          Gets compression type for path.
43  
44          Args:
45              path: path to file
46  
47          Returns:
48              compression type
49          """
50  
51          # Infer compression type from last path component. Limit to supported types.
52          compression = path.lower().split(".")[-1]
53          return compression if compression in ("bz2", "gz", "xz") else None