/ calculate_mirror_size.py
calculate_mirror_size.py
1 # /// script 2 # dependencies = [ 3 # "requests", 4 # "bs4", 5 # "docopt", 6 # "rich", 7 # ] 8 # /// 9 10 """calculate_mirror_size.py 11 12 Usage: 13 calculate_mirror_size.py -H <http_url> -R <rsync_url> 14 calculate_mirror_size.py -h 15 16 Examples: 17 calculate_mirror_size.py -H "https://mirrors.servercentral.com/voidlinux/" -R "rsync://repo-sync.voidlinux.org/voidlinux/" 18 19 Options: 20 -H <http_url> HTTP URL of the mirror 21 -R <rsync_url> rsync URL of the mirror 22 -h, --help show this help message and exit 23 """ 24 25 import subprocess 26 from tempfile import TemporaryDirectory 27 28 import requests 29 from bs4 import BeautifulSoup 30 from docopt import docopt 31 from rich.console import Console 32 from rich.text import Text 33 34 35 def human_bytes(bites: int) -> str: 36 B = float(bites) 37 KiB = float(1024) 38 MiB = float(KiB**2) 39 GiB = float(KiB**3) 40 TiB = float(KiB**4) 41 42 match B: 43 case B if B < KiB: 44 return "{0} {1}".format(B, "bytes" if 0 == B > 1 else "byte") 45 case B if KiB <= B < MiB: 46 return "{0:.2f} KiB".format(B / KiB) 47 case B if MiB <= B < GiB: 48 return "{0:.2f} MiB".format(B / MiB) 49 case B if GiB <= B < TiB: 50 return "{0:.2f} GiB".format(B / GiB) 51 case B if TiB <= B: 52 return "{0:.2f} TiB".format(B / TiB) 53 case _: 54 return "" 55 56 57 if __name__ == "__main__": 58 args = docopt(__doc__) # type: ignore 59 60 repo_shorthand = Text(args["-R"].split("//")[1]) 61 62 print() 63 console = Console() 64 with console.status( 65 "[bold magenta]Calculating mirror size...", spinner="aesthetic" 66 ) as status: 67 response = requests.get(args["-H"], timeout=60) 68 soup = BeautifulSoup(response.text, "html.parser") 69 mirror_dirs = [] 70 for node in soup.find_all("a"): 71 if not node.get("href").startswith(".") and node.get("href").endswith("/"): 72 mirror_dirs.append(node.get("href")) 73 74 console.log( 75 f"Summing up the sizes of each directory in [bold blue]{repo_shorthand}[/bold blue]." 76 ) 77 with TemporaryDirectory() as tmp_dir: 78 dir_sizes = [] 79 max_dir_len = len(max(mirror_dirs, key=len)) 80 for dir in mirror_dirs: 81 rsync_cmd = f"rsync -a -n --stats {args['-R']}/{dir}/ | grep '^Total file size' | tr -d ','" 82 output = subprocess.run(rsync_cmd, shell=True, capture_output=True) 83 logstr = ( 84 dir.rjust(max_dir_len) 85 + " " 86 + human_bytes(int(output.stdout.split()[3])) 87 ) 88 console.log(logstr) 89 dir_sizes.append(int(output.stdout.split()[3])) 90 91 console.print( 92 f"\n[bold blue]{repo_shorthand}[/bold blue]: " + human_bytes(sum(dir_sizes)) 93 )