/ inputs_processing / create_npys_from_sysu.py
create_npys_from_sysu.py
  1  import os
  2  import numpy as np
  3  from skimage import io
  4  import pandas as pd
  5  from scipy.ndimage import zoom
  6  import glob
  7  from pandas import DataFrame
  8  
  9  
 10  
 11  #### Images locations  ####
 12  IMAGES_PATH = '/home/dvalsamis/Documents/data/sysu/original_set/total/'
 13  TRAIN_LABEL_PATH = '/home/dvalsamis/Documents/data/sysu/original_set/train/label/'
 14  TEST_LABEL_PATH = '/home/dvalsamis/Documents/data/sysu/original_set/test/label/'
 15  
 16  #### Save targets for the npy files ####
 17  SAVE_IMAGES = '/home/dvalsamis/Documents/data/sysu/SYSU_NPY/total_NPY/'
 18  SAVE_TRAIN_LABELS = '/home/dvalsamis/Documents/data/sysu/SYSU_NPY/train_labels_NPY/'
 19  SAVE_TEST_LABELS = '/home/dvalsamis/Documents/data/sysu/SYSU_NPY/test_labels_NPY/'
 20  
 21  images_set = pd.DataFrame(columns=['pair1','pair2','name'])
 22  train_set = pd.DataFrame(columns=['pair1','pair2','change_mask'])
 23  test_set = pd.DataFrame(columns=['pair1','pair2','change_mask'])
 24  
 25  
 26  #Method to save the change masks to npy files
 27  def save_labels(path,folder,prefix, target, index, train=False, test=False):
 28      cm = read_changemask(path+folder)
 29      if train:
 30          train_set.loc[index,'pair1'] = str(prefix)+"_a.npy"
 31          train_set.loc[index,'pair2'] = str(prefix)+"_b.npy"
 32          train_set.loc[index,'change_mask'] = str(prefix)+"_cm.npy"
 33      if test:
 34          test_set.loc[index,'pair1'] = str(prefix)+"_a.npy"
 35          test_set.loc[index,'pair2'] = str(prefix)+"_b.npy"
 36          test_set.loc[index,'change_mask'] = str(prefix)+"_cm.npy"
 37      np.save(target + str(prefix)+"_cm.npy",cm)
 38  
 39  
 40  # final method that saves the images in a npy format
 41  def save_image(path_to_images, prefix, suffix, path_to_target):
 42      image_to_save = read_single_png_image(path_to_images)
 43      np.save(path_to_target + str(prefix) +"_"+ str(suffix) + ".npy", image_to_save)
 44      
 45      return image_to_save
 46      
 47  
 48  
 49  def adjust_shape(I, s):
 50      """Adjust shape of grayscale image I to s."""
 51      # crop if necesary  
 52      I = I[:s[0],:s[1]]
 53      si = I.shape
 54  
 55      # pad if necessary 
 56      p0 = max(0,s[0] - si[0])
 57      p1 = max(0,s[1] - si[1])
 58  
 59      return np.pad(I,((0,p0),(0,p1)),'edge')
 60  
 61  
 62  
 63  def read_single_png_image(path):
 64      """Read a .png file, either directly from the path or from a directory."""
 65      if os.path.isdir(path):
 66          # If the path is a directory, find the first .png file in it
 67          files = [f for f in os.listdir(path) if f.endswith('.png')]
 68          if not files:
 69              raise FileNotFoundError("No PNG files found in the directory.")
 70          file_path = os.path.join(path, files[0])  # First .png file
 71      elif os.path.isfile(path) and path.endswith('.png'):
 72          # If the path is directly a .png file
 73          file_path = path
 74      else:
 75          raise FileNotFoundError("No PNG file found at the provided path.")
 76  
 77      print(f"Loading image: {file_path}")
 78      
 79      # Load the image using skimage.io.imread
 80      image = io.imread(file_path)
 81      
 82      return image
 83  
 84  
 85  def read_changemask(cm_path):
 86      """Read change mask from PNG files, convert values from [0, 255] to [0, 1]. Ensure the file exists."""
 87      cm_files = glob.glob(cm_path)  # Fetch all PNG files in directory
 88      if not cm_files:
 89          raise FileNotFoundError(f"No mask files found at {cm_path}")
 90      
 91      cm = io.imread(cm_files[0], as_gray=True)  # Safely read the first file, if it exists
 92      cm_normalized = (cm / 255.0).astype(int)  # Normalize to [0, 1] and convert to integer
 93      
 94      return cm_normalized
 95  
 96  def make_image_pairs(images_path, save_images):
 97      folder_a = os.path.join(images_path, 'time1')
 98      folder_b = os.path.join(images_path, 'time2')
 99      
100      images_a = [f for f in os.listdir(folder_a) if f.endswith('.png')]
101      images_b = set(os.listdir(folder_b))  # Use a set for quick lookup
102  
103      images_set = pd.DataFrame(columns=['name', 'pair1', 'pair2'])
104  
105      for i, filename in enumerate(images_a):
106          if filename in images_b:  # Check if the corresponding image exists in folder 'B'
107              path1 = os.path.join(folder_a, filename)
108              path2 = os.path.join(folder_b, filename)
109              print(f"Processing pair {filename}:")
110              
111              img1 = save_image(path1, i, "a", save_images)
112              img2 = save_image(path2, i, "b", save_images)
113  
114              # Store information in the DataFrame
115              images_set.loc[i] = {'name': filename, 'pair1': f"{i}_a.npy", 'pair2': f"{i}_b.npy"}
116  
117      print("Done with image pairs!")
118      print("Saving dataframe")
119      images_set.to_csv(os.path.join(save_images, 'Sysu_set.csv'), index=False)
120  
121  # Call the function
122  make_image_pairs(IMAGES_PATH, SAVE_IMAGES)
123  
124  path1 = os.path.join(IMAGES_PATH, 'time1')
125  folder_list = [f for f in os.listdir(path1) if f.endswith('.txt')==False]
126  
127  train_label_list = [f for f in os.listdir(TRAIN_LABEL_PATH ) if f.endswith('.txt')==False]
128      
129  pos=0    
130  for i in range(len(folder_list)):
131      for j in range(len(train_label_list)):
132          if folder_list[i] == train_label_list[j]:
133              save_labels(TRAIN_LABEL_PATH,train_label_list[j], i, SAVE_TRAIN_LABELS, pos, train=True)
134              pos+=1
135  print("DONE with train set!")
136  print("Saving dataframe")    
137  train_set.to_csv(SAVE_TRAIN_LABELS + 'train_set.csv', index=False)
138  
139  
140              
141  test_label_list = os.listdir(TEST_LABEL_PATH)
142  
143  pos = 0
144  for i in range(len(folder_list)):
145      for j in range(len(test_label_list)):
146          if folder_list[i] == test_label_list[j]:
147              save_labels(TEST_LABEL_PATH,test_label_list[j], i, SAVE_TEST_LABELS, pos, train=False, test=True)
148              pos += 1
149  
150  print("DONE with test set!")
151  print("Saving dataframe")    
152  test_set.to_csv(SAVE_TEST_LABELS + 'test_set.csv', index=False)
153  
154  print("DONE")