/ .github / scripts / download_scipy_datasets.py
download_scipy_datasets.py
 1  import os
 2  import shutil
 3  import zipfile
 4  from pathlib import Path
 5  
 6  from sklearn import datasets
 7  
 8  data_home = datasets.get_data_home()
 9  print("Path of download datasets to: ", data_home)
10  
11  # print("Download California Housing dataset...")
12  # datasets.fetch_california_housing()
13  print("Unzip LFW people dataset...")
14  
15  root_dir = Path(__file__).resolve().parents[2]
16  lfw_zip_path = root_dir / "test_data" / "lfw-dataset.zip"
17  lfw_home_path = Path(data_home) / "lfw_home"
18  if not lfw_home_path.exists():
19      with zipfile.ZipFile(lfw_zip_path, "r") as zip_ref:
20          zip_ref.extractall(data_home)
21  
22  
23  datasets.fetch_lfw_people()
24  print("Copying 20 news group dataset...")
25  shutil.copy(root_dir / "test_data" / "20news-bydate_py3.pkz", data_home)
26  datasets.fetch_20newsgroups()
27  print("Copying California Housing...")
28  shutil.copy(root_dir / "test_data" / "cal_housing_py3.pkz", data_home)
29  datasets.fetch_california_housing()
30  print("Download completed.")
31  print(f"Content of datasets cache: {os.listdir(data_home)}")