/ inputs_processing / create_npys_from_sysu.py
create_npys_from_sysu.py
1 import os 2 import numpy as np 3 from skimage import io 4 import pandas as pd 5 from scipy.ndimage import zoom 6 import glob 7 from pandas import DataFrame 8 9 10 11 #### Images locations #### 12 IMAGES_PATH = '/home/dvalsamis/Documents/data/sysu/original_set/total/' 13 TRAIN_LABEL_PATH = '/home/dvalsamis/Documents/data/sysu/original_set/train/label/' 14 TEST_LABEL_PATH = '/home/dvalsamis/Documents/data/sysu/original_set/test/label/' 15 16 #### Save targets for the npy files #### 17 SAVE_IMAGES = '/home/dvalsamis/Documents/data/sysu/SYSU_NPY/total_NPY/' 18 SAVE_TRAIN_LABELS = '/home/dvalsamis/Documents/data/sysu/SYSU_NPY/train_labels_NPY/' 19 SAVE_TEST_LABELS = '/home/dvalsamis/Documents/data/sysu/SYSU_NPY/test_labels_NPY/' 20 21 images_set = pd.DataFrame(columns=['pair1','pair2','name']) 22 train_set = pd.DataFrame(columns=['pair1','pair2','change_mask']) 23 test_set = pd.DataFrame(columns=['pair1','pair2','change_mask']) 24 25 26 #Method to save the change masks to npy files 27 def save_labels(path,folder,prefix, target, index, train=False, test=False): 28 cm = read_changemask(path+folder) 29 if train: 30 train_set.loc[index,'pair1'] = str(prefix)+"_a.npy" 31 train_set.loc[index,'pair2'] = str(prefix)+"_b.npy" 32 train_set.loc[index,'change_mask'] = str(prefix)+"_cm.npy" 33 if test: 34 test_set.loc[index,'pair1'] = str(prefix)+"_a.npy" 35 test_set.loc[index,'pair2'] = str(prefix)+"_b.npy" 36 test_set.loc[index,'change_mask'] = str(prefix)+"_cm.npy" 37 np.save(target + str(prefix)+"_cm.npy",cm) 38 39 40 # final method that saves the images in a npy format 41 def save_image(path_to_images, prefix, suffix, path_to_target): 42 image_to_save = read_single_png_image(path_to_images) 43 np.save(path_to_target + str(prefix) +"_"+ str(suffix) + ".npy", image_to_save) 44 45 return image_to_save 46 47 48 49 def adjust_shape(I, s): 50 """Adjust shape of grayscale image I to s.""" 51 # crop if necesary 52 I = I[:s[0],:s[1]] 53 si = I.shape 54 55 # pad if necessary 56 p0 = max(0,s[0] - si[0]) 57 p1 = max(0,s[1] - si[1]) 58 59 return np.pad(I,((0,p0),(0,p1)),'edge') 60 61 62 63 def read_single_png_image(path): 64 """Read a .png file, either directly from the path or from a directory.""" 65 if os.path.isdir(path): 66 # If the path is a directory, find the first .png file in it 67 files = [f for f in os.listdir(path) if f.endswith('.png')] 68 if not files: 69 raise FileNotFoundError("No PNG files found in the directory.") 70 file_path = os.path.join(path, files[0]) # First .png file 71 elif os.path.isfile(path) and path.endswith('.png'): 72 # If the path is directly a .png file 73 file_path = path 74 else: 75 raise FileNotFoundError("No PNG file found at the provided path.") 76 77 print(f"Loading image: {file_path}") 78 79 # Load the image using skimage.io.imread 80 image = io.imread(file_path) 81 82 return image 83 84 85 def read_changemask(cm_path): 86 """Read change mask from PNG files, convert values from [0, 255] to [0, 1]. Ensure the file exists.""" 87 cm_files = glob.glob(cm_path) # Fetch all PNG files in directory 88 if not cm_files: 89 raise FileNotFoundError(f"No mask files found at {cm_path}") 90 91 cm = io.imread(cm_files[0], as_gray=True) # Safely read the first file, if it exists 92 cm_normalized = (cm / 255.0).astype(int) # Normalize to [0, 1] and convert to integer 93 94 return cm_normalized 95 96 def make_image_pairs(images_path, save_images): 97 folder_a = os.path.join(images_path, 'time1') 98 folder_b = os.path.join(images_path, 'time2') 99 100 images_a = [f for f in os.listdir(folder_a) if f.endswith('.png')] 101 images_b = set(os.listdir(folder_b)) # Use a set for quick lookup 102 103 images_set = pd.DataFrame(columns=['name', 'pair1', 'pair2']) 104 105 for i, filename in enumerate(images_a): 106 if filename in images_b: # Check if the corresponding image exists in folder 'B' 107 path1 = os.path.join(folder_a, filename) 108 path2 = os.path.join(folder_b, filename) 109 print(f"Processing pair {filename}:") 110 111 img1 = save_image(path1, i, "a", save_images) 112 img2 = save_image(path2, i, "b", save_images) 113 114 # Store information in the DataFrame 115 images_set.loc[i] = {'name': filename, 'pair1': f"{i}_a.npy", 'pair2': f"{i}_b.npy"} 116 117 print("Done with image pairs!") 118 print("Saving dataframe") 119 images_set.to_csv(os.path.join(save_images, 'Sysu_set.csv'), index=False) 120 121 # Call the function 122 make_image_pairs(IMAGES_PATH, SAVE_IMAGES) 123 124 path1 = os.path.join(IMAGES_PATH, 'time1') 125 folder_list = [f for f in os.listdir(path1) if f.endswith('.txt')==False] 126 127 train_label_list = [f for f in os.listdir(TRAIN_LABEL_PATH ) if f.endswith('.txt')==False] 128 129 pos=0 130 for i in range(len(folder_list)): 131 for j in range(len(train_label_list)): 132 if folder_list[i] == train_label_list[j]: 133 save_labels(TRAIN_LABEL_PATH,train_label_list[j], i, SAVE_TRAIN_LABELS, pos, train=True) 134 pos+=1 135 print("DONE with train set!") 136 print("Saving dataframe") 137 train_set.to_csv(SAVE_TRAIN_LABELS + 'train_set.csv', index=False) 138 139 140 141 test_label_list = os.listdir(TEST_LABEL_PATH) 142 143 pos = 0 144 for i in range(len(folder_list)): 145 for j in range(len(test_label_list)): 146 if folder_list[i] == test_label_list[j]: 147 save_labels(TEST_LABEL_PATH,test_label_list[j], i, SAVE_TEST_LABELS, pos, train=False, test=True) 148 pos += 1 149 150 print("DONE with test set!") 151 print("Saving dataframe") 152 test_set.to_csv(SAVE_TEST_LABELS + 'test_set.csv', index=False) 153 154 print("DONE")