download_scipy_datasets.py
1 import os 2 import shutil 3 import zipfile 4 from pathlib import Path 5 6 from sklearn import datasets 7 8 data_home = datasets.get_data_home() 9 print("Path of download datasets to: ", data_home) 10 11 # print("Download California Housing dataset...") 12 # datasets.fetch_california_housing() 13 print("Unzip LFW people dataset...") 14 15 root_dir = Path(__file__).resolve().parents[2] 16 lfw_zip_path = root_dir / "test_data" / "lfw-dataset.zip" 17 lfw_home_path = Path(data_home) / "lfw_home" 18 if not lfw_home_path.exists(): 19 with zipfile.ZipFile(lfw_zip_path, "r") as zip_ref: 20 zip_ref.extractall(data_home) 21 22 23 datasets.fetch_lfw_people() 24 print("Copying 20 news group dataset...") 25 shutil.copy(root_dir / "test_data" / "20news-bydate_py3.pkz", data_home) 26 datasets.fetch_20newsgroups() 27 print("Copying California Housing...") 28 shutil.copy(root_dir / "test_data" / "cal_housing_py3.pkz", data_home) 29 datasets.fetch_california_housing() 30 print("Download completed.") 31 print(f"Content of datasets cache: {os.listdir(data_home)}")