GradientDescent.java
1 import java.io.IOException; 2 import java.util.Arrays; 3 4 /** 5 * Created by MichaelBick on 7/28/15. 6 * first dimension of inputs is the data point, second dimension is the feature number 7 */ 8 public class GradientDescent { 9 private Features features; 10 11 double[][] train; 12 double[] trainActual; 13 14 private double[] mean; 15 private double[] stdDev; 16 private double actualMean; 17 private double actualStdDev; 18 19 public GradientDescent(Symbol[] trainStocks, int NUM_POINTS, int DAYS_BACK, int FUTURE_DAYS) throws IOException { 20 features = new Features(); 21 22 train = getData(trainStocks, NUM_POINTS, DAYS_BACK); 23 trainActual = getActual(trainStocks, NUM_POINTS, DAYS_BACK, FUTURE_DAYS); 24 25 // Get data mean and standard deviation 26 mean = getMean(train); 27 stdDev = getStdDev(train); 28 29 // Normalize training data 30 train = normalize(train); 31 32 // Get actual mean and standard deviation 33 actualMean = getMean(trainActual); 34 actualStdDev = getStdDev(trainActual); 35 36 // Normalize training actuals 37 trainActual = normalize(trainActual); 38 } 39 40 public double[] normalize(double[] data){ 41 double[] normalizedData = data; 42 43 //Get mean, stdDev, etc for normalization calculations 44 int size = data.length; 45 46 for (int i = 0; i < size; i++){ 47 normalizedData[i] = (data[i] - actualMean) / actualStdDev; 48 } 49 50 return normalizedData; 51 } 52 53 public double[][] normalize(double[][] data){ 54 double[][] normalizedData = data; 55 56 //Get mean, stdDev, etc for normalization calculations 57 int numFeatures = data[0].length; 58 59 for (int feature = 1; feature < numFeatures; feature ++){ 60 for (int point = 0; point < data.length; point ++){ 61 normalizedData[point][feature] = (data[point][feature] - mean[feature]) / stdDev[feature]; 62 } 63 } 64 return normalizedData; 65 } 66 67 public static double getMean(double[] data) { 68 double sum = 0; 69 70 // Calculate the mean 71 for (double point : data) { 72 sum += point; 73 } 74 75 return sum / data.length; 76 } 77 78 public static double[] getMean(double[][] data) { 79 int numFeatures = data[0].length; 80 81 double[] means = new double[numFeatures]; 82 83 // Calculate the mean 84 for (int i = 0; i < numFeatures; i++) { 85 // Sum the values for each feature 86 for (double[] point : data) { 87 means[i] += point[i]; 88 } 89 90 // Divide by the amount of data points 91 means[i] /= data.length; 92 } 93 94 return means; 95 } 96 97 private static double getVariance(double[] data) { 98 double variance = 0; 99 100 double mean = getMean(data); 101 102 for (double point : data) { 103 variance += Math.pow(point - mean, 2); 104 } 105 106 variance /= data.length; 107 108 return variance; 109 } 110 111 private static double[] getVariance(double[][] data) { 112 int numFeatures = data[0].length; 113 114 double[] variance = new double[numFeatures]; 115 116 double[] mean = getMean(data); 117 118 // Calculate the variance for each feature 119 for (int i = 0; i < numFeatures; i++) { 120 // Sum the squares of the difference from mean 121 for (double[] point : data) { 122 variance[i] += Math.pow((point[i] - mean[i]), 2); 123 } 124 125 // Divide by the amount of data points 126 variance[i] /= data.length; 127 } 128 129 return variance; 130 } 131 132 public static double getStdDev(double[] data) { 133 return Math.sqrt(getVariance(data)); 134 } 135 136 public static double[] getStdDev(double[][] data) { 137 // Set the standard deviations to the variances 138 double[] StdDev = getVariance(data); 139 140 // Now square root the variances 141 for (double variance : StdDev) { 142 variance = Math.sqrt(variance); 143 } 144 145 return StdDev; 146 } 147 148 // Get set of data with more recent data first 149 public double[][] getData(Symbol[] stocks, int size, int daysAgo) throws IOException { 150 int numStocks = stocks.length; 151 152 double[][] data = new double[numStocks * size][features.getFeatures(stocks[0], 0).length]; 153 154 for (int i = 0; i < numStocks; i++) { 155 for (int j = 0; j < size; j++) { 156 data[(i * size) + j] = features.getFeatures(stocks[i], j + daysAgo); 157 } 158 } 159 160 return data; 161 } 162 163 // Get set of actuals with most recent actuals first 164 public static double[] getActual(Symbol[] stocks, int size, int daysAgo, int futureDays) throws IOException { 165 int numStocks = stocks.length; 166 167 double[] actuals = new double[numStocks * size]; 168 169 for (int i = 0; i < numStocks; i++) { 170 for (int j = 0; j < size; j++) { 171 actuals[(i * size) + j] = stocks[i].getAdjClose(j + daysAgo - futureDays).doubleValue(); 172 } 173 } 174 175 return actuals; 176 } 177 178 // first array of data in data, second is features 179 private static double[] getPredictions(double[] coef, double[][] data, double mean, double stdDev) { 180 int NUM_DATA = data.length; 181 int NUM_FEATURES = coef.length; 182 183 double[] predictions = new double[NUM_DATA]; 184 185 for (int j = 0; j < NUM_DATA; j++) { 186 // multiply each feature of data by its weight, sum, and then put in predictions 187 for (int k = 0; k < NUM_FEATURES; k++) { 188 // Calculate prediction using linear regression function 189 predictions[j] += data[j][k] * coef[k]; 190 } 191 192 // Un-normalize the prediction 193 predictions[j] = (predictions[j] * stdDev) + mean; 194 } 195 196 return predictions; 197 } 198 199 public double[] getPredictions(double[] coef, double[][] data) { 200 return getPredictions(coef, data, actualMean, actualStdDev); 201 } 202 203 public double getCost(double[] theta) { 204 int size = trainActual.length; 205 206 double[] predictions = getPredictions(theta, train, 0, 1); 207 208 double sumErrors = 0; 209 210 for (int i = 0; i < size; i++) { 211 sumErrors += Math.pow(predictions[i] - trainActual[i], 2); 212 } 213 214 return (1.0 / (2 * size)) * sumErrors; 215 } 216 217 public double[] train(double alpha, int numIters) { 218 double[] theta = new double[train[0].length]; 219 220 int m = trainActual.length; 221 int numFeatures = train[0].length; 222 223 for (int i = 0; i < numIters; i++) { 224 // Calculate predictions 225 double[] predictions = getPredictions(theta, train, 0.0, 1.0); 226 227 // Calculate error 228 double[] errorSums = new double[numFeatures]; 229 230 for (int j = 0; j < numFeatures; j++) { 231 for (int k = 0; k < m; k++) { 232 errorSums[j] += (predictions[k] - trainActual[k]) * train[k][j]; 233 } 234 } 235 236 for (int j = 0; j < numFeatures; j++) { 237 theta[j] -= alpha * (1.0 / m) * errorSums[j]; 238 } 239 240 // System.out.println(Arrays.toString(theta)); 241 System.out.println(getCost(theta)); 242 } 243 244 return theta; 245 } 246 }