/ calculate_mirror_size.py
calculate_mirror_size.py
 1  # /// script
 2  # dependencies = [
 3  #   "requests",
 4  #   "bs4",
 5  #   "docopt",
 6  #   "rich",
 7  # ]
 8  # ///
 9  
10  """calculate_mirror_size.py
11  
12  Usage:
13      calculate_mirror_size.py -H <http_url> -R <rsync_url>
14      calculate_mirror_size.py -h
15  
16  Examples:
17      calculate_mirror_size.py -H "https://mirrors.servercentral.com/voidlinux/" -R "rsync://repo-sync.voidlinux.org/voidlinux/"
18  
19  Options:
20      -H <http_url>   HTTP URL of the mirror
21      -R <rsync_url>  rsync URL of the mirror
22      -h, --help      show this help message and exit
23  """
24  
25  import subprocess
26  from tempfile import TemporaryDirectory
27  
28  import requests
29  from bs4 import BeautifulSoup
30  from docopt import docopt
31  from rich.console import Console
32  from rich.text import Text
33  
34  
35  def human_bytes(bites: int) -> str:
36      B = float(bites)
37      KiB = float(1024)
38      MiB = float(KiB**2)
39      GiB = float(KiB**3)
40      TiB = float(KiB**4)
41  
42      match B:
43          case B if B < KiB:
44              return "{0} {1}".format(B, "bytes" if 0 == B > 1 else "byte")
45          case B if KiB <= B < MiB:
46              return "{0:.2f} KiB".format(B / KiB)
47          case B if MiB <= B < GiB:
48              return "{0:.2f} MiB".format(B / MiB)
49          case B if GiB <= B < TiB:
50              return "{0:.2f} GiB".format(B / GiB)
51          case B if TiB <= B:
52              return "{0:.2f} TiB".format(B / TiB)
53          case _:
54              return ""
55  
56  
57  if __name__ == "__main__":
58      args = docopt(__doc__)  # type: ignore
59  
60      repo_shorthand = Text(args["-R"].split("//")[1])
61  
62      print()
63      console = Console()
64      with console.status(
65          "[bold magenta]Calculating mirror size...", spinner="aesthetic"
66      ) as status:
67          response = requests.get(args["-H"], timeout=60)
68          soup = BeautifulSoup(response.text, "html.parser")
69          mirror_dirs = []
70          for node in soup.find_all("a"):
71              if not node.get("href").startswith(".") and node.get("href").endswith("/"):
72                  mirror_dirs.append(node.get("href"))
73  
74          console.log(
75              f"Summing up the sizes of each directory in [bold blue]{repo_shorthand}[/bold blue]."
76          )
77          with TemporaryDirectory() as tmp_dir:
78              dir_sizes = []
79              max_dir_len = len(max(mirror_dirs, key=len))
80              for dir in mirror_dirs:
81                  rsync_cmd = f"rsync -a -n --stats {args['-R']}/{dir}/ | grep '^Total file size' | tr -d ','"
82                  output = subprocess.run(rsync_cmd, shell=True, capture_output=True)
83                  logstr = (
84                      dir.rjust(max_dir_len)
85                      + " "
86                      + human_bytes(int(output.stdout.split()[3]))
87                  )
88                  console.log(logstr)
89                  dir_sizes.append(int(output.stdout.split()[3]))
90  
91      console.print(
92          f"\n[bold blue]{repo_shorthand}[/bold blue]: " + human_bytes(sum(dir_sizes))
93      )