/ src / GradientDescent.java
GradientDescent.java
  1  import java.io.IOException;
  2  import java.util.Arrays;
  3  
  4  /**
  5   * Created by MichaelBick on 7/28/15.
  6   * first dimension of inputs is the data point, second dimension is the feature number
  7   */
  8  public class GradientDescent {
  9      private Features features;
 10      
 11      double[][] train;
 12      double[] trainActual;
 13      
 14      private double[] mean;
 15      private double[] stdDev;
 16      private double actualMean;
 17      private double actualStdDev;
 18      
 19      public GradientDescent(Symbol[] trainStocks, int NUM_POINTS, int DAYS_BACK, int FUTURE_DAYS) throws IOException {
 20      	features = new Features();
 21      	
 22      	 train = getData(trainStocks, NUM_POINTS, DAYS_BACK);
 23           trainActual = getActual(trainStocks, NUM_POINTS, DAYS_BACK, FUTURE_DAYS);
 24           
 25           // Get data mean and standard deviation
 26           mean = getMean(train);
 27           stdDev = getStdDev(train);
 28           
 29           // Normalize training data
 30           train = normalize(train);
 31  
 32           // Get actual mean and standard deviation
 33           actualMean = getMean(trainActual);
 34           actualStdDev = getStdDev(trainActual);
 35  
 36           // Normalize training actuals
 37           trainActual = normalize(trainActual);
 38      }
 39      
 40  	public double[] normalize(double[] data){
 41  		double[] normalizedData = data;
 42  
 43  		//Get mean, stdDev, etc for normalization calculations
 44  		int size = data.length;
 45  
 46  		for (int i = 0; i < size; i++){
 47  			normalizedData[i] = (data[i] - actualMean) / actualStdDev;
 48  		}
 49  
 50  		return normalizedData;
 51  	}
 52  
 53  	public double[][] normalize(double[][] data){
 54  		double[][] normalizedData = data;
 55  		
 56  		//Get mean, stdDev, etc for normalization calculations
 57  		int numFeatures = data[0].length;
 58  		
 59  		for (int feature = 1; feature < numFeatures; feature ++){
 60  			for (int point = 0; point < data.length; point ++){
 61  				normalizedData[point][feature] = (data[point][feature] - mean[feature]) / stdDev[feature];
 62  			}
 63  		}
 64  		return normalizedData; 
 65  	}
 66  
 67  	public static double getMean(double[] data) {
 68  		double sum = 0;
 69  
 70  		// Calculate the mean
 71  		for (double point : data) {
 72  			sum += point;
 73  		}
 74  
 75  		return sum / data.length;
 76  	}
 77  	
 78  	public static double[] getMean(double[][] data) {
 79  		int numFeatures = data[0].length;
 80  		
 81  		double[] means = new double[numFeatures];
 82  		
 83  		// Calculate the mean
 84  		for (int i = 0; i < numFeatures; i++) {
 85  			// Sum the values for each feature
 86  			for (double[] point : data) {
 87  				means[i] += point[i];
 88  			}
 89  			
 90  			// Divide by the amount of data points
 91  			means[i] /= data.length;
 92  		}
 93  		
 94  		return means;
 95  	}
 96  
 97  	private static double getVariance(double[] data) {
 98  		double variance = 0;
 99  
100  		double mean = getMean(data);
101  
102  		for (double point : data) {
103  			variance += Math.pow(point - mean, 2);
104  		}
105  
106  		variance /= data.length;
107  
108  		return variance;
109  	}
110  
111  	private static double[] getVariance(double[][] data) {
112  		int numFeatures = data[0].length;
113  		
114  		double[] variance = new double[numFeatures];
115  		
116  		double[] mean = getMean(data);
117  		
118  		// Calculate the variance for each feature
119  		for (int i = 0; i < numFeatures; i++) {
120  			// Sum the squares of the difference from mean
121  			for (double[] point : data) {
122  				variance[i] += Math.pow((point[i] - mean[i]), 2);
123  			}
124  			
125  			// Divide by the amount of data points
126  			variance[i] /= data.length;
127  		}
128  		
129  		return variance;
130  	}
131  
132  	public static double getStdDev(double[] data) {
133  		return Math.sqrt(getVariance(data));
134  	}
135  
136  	public static double[] getStdDev(double[][] data) {
137  		// Set the standard deviations to the variances
138  		double[] StdDev = getVariance(data);
139  
140  		// Now square root the variances
141  		for (double variance : StdDev) {
142  			variance = Math.sqrt(variance);
143  		}
144  
145  		return StdDev;
146  	}
147  	
148  	// Get set of data with more recent data first
149     public double[][] getData(Symbol[] stocks, int size, int daysAgo) throws IOException {
150          int numStocks = stocks.length;
151  
152          double[][] data = new double[numStocks * size][features.getFeatures(stocks[0], 0).length];
153  
154          for (int i = 0; i < numStocks; i++) {
155              for (int j = 0; j < size; j++) {
156                  data[(i * size) + j] = features.getFeatures(stocks[i], j + daysAgo);
157              }
158          }
159  
160          return data;
161      }
162  
163      // Get set of actuals with most recent actuals first
164      public static double[] getActual(Symbol[] stocks, int size, int daysAgo, int futureDays) throws IOException {
165          int numStocks = stocks.length;
166  
167          double[] actuals = new double[numStocks * size];
168  
169          for (int i = 0; i < numStocks; i++) {
170              for (int j = 0; j < size; j++) {
171                  actuals[(i * size) + j] = stocks[i].getAdjClose(j + daysAgo - futureDays).doubleValue();
172              }
173          }
174  
175          return actuals;
176      }
177  	
178  	// first array of data in data, second is features
179  	private static double[] getPredictions(double[] coef, double[][] data, double mean, double stdDev) {
180      	int NUM_DATA = data.length;
181      	int NUM_FEATURES = coef.length;
182  		
183  		double[] predictions = new double[NUM_DATA];
184      	
185      	for (int j = 0; j < NUM_DATA; j++) {
186  			// multiply each feature of data by its weight, sum, and then put in predictions
187  			for (int k = 0; k < NUM_FEATURES; k++) {
188  				// Calculate prediction using linear regression function
189  				predictions[j] += data[j][k] * coef[k];
190  			}
191  
192  			// Un-normalize the prediction
193  			predictions[j] = (predictions[j] * stdDev) + mean;
194  		}
195      	
196      	return predictions;
197  	}
198  	
199  	public double[] getPredictions(double[] coef, double[][] data) {
200  		return getPredictions(coef, data, actualMean, actualStdDev);
201  	}
202  	
203  	public double getCost(double[] theta) {
204          int size = trainActual.length;
205  
206          double[] predictions = getPredictions(theta, train, 0, 1);
207  
208          double sumErrors = 0;
209  
210          for (int i = 0; i < size; i++) {
211              sumErrors += Math.pow(predictions[i] - trainActual[i], 2);
212          }
213  
214          return (1.0 / (2 * size)) * sumErrors;
215      }
216  	
217      public double[] train(double alpha, int numIters) {
218      	double[] theta = new double[train[0].length];
219      	
220      	int m = trainActual.length;
221          int numFeatures = train[0].length;
222          
223          for (int i = 0; i < numIters; i++) {
224          	// Calculate predictions
225          	double[] predictions = getPredictions(theta, train, 0.0, 1.0);
226          	
227          	// Calculate error
228          	double[] errorSums = new double[numFeatures];
229          	
230          	for (int j = 0; j < numFeatures; j++) {
231          		for (int k = 0; k < m; k++) {
232          			errorSums[j] += (predictions[k] - trainActual[k]) * train[k][j];
233          		}
234          	}
235          	
236          	for (int j = 0; j < numFeatures; j++) {
237          		theta[j] -= alpha * (1.0 / m) * errorSums[j];
238          	}
239          	
240          	// System.out.println(Arrays.toString(theta));
241          	System.out.println(getCost(theta));
242          }
243          
244          return theta;
245      }
246  }