/ src / northbridge / intel / gm45 / raminit_read_write_training.c
raminit_read_write_training.c
  1  /* SPDX-License-Identifier: GPL-2.0-only */
  2  
  3  #include <stdint.h>
  4  #include <device/mmio.h>
  5  #include <pc80/mc146818rtc.h>
  6  #include <console/console.h>
  7  #include "gm45.h"
  8  
  9  typedef struct {
 10  	u32 addr[RANKS_PER_CHANNEL];
 11  	unsigned int count;
 12  } address_bunch_t;
 13  
 14  /* Read Training. */
 15  #define CxRDTy_MCHBAR(ch, bl)	(0x14b0 + ((ch) * 0x0100) + ((7 - (bl)) * 4))
 16  #define CxRDTy_T_SHIFT		20
 17  #define CxRDTy_T_MASK		(0xf << CxRDTy_T_SHIFT)
 18  #define CxRDTy_T(t)		(((t) << CxRDTy_T_SHIFT) & CxRDTy_T_MASK)
 19  #define CxRDTy_P_SHIFT		16
 20  #define CxRDTy_P_MASK		(0x7 << CxRDTy_P_SHIFT)
 21  #define CxRDTy_P(p)		(((p) << CxRDTy_P_SHIFT) & CxRDTy_P_MASK)
 22  static const u32 read_training_schedule[] = {
 23  	0xfefefefe, 0x7f7f7f7f, 0xbebebebe, 0xdfdfdfdf,
 24  	0xeeeeeeee, 0xf7f7f7f7, 0xfafafafa, 0xfdfdfdfd,
 25  	0x00000000, 0x81818181, 0x40404040, 0x21212121,
 26  	0x10101010, 0x09090909, 0x04040404, 0x03030303,
 27  	0x10101010, 0x11111111, 0xeeeeeeee, 0xefefefef,
 28  	0x10101010, 0x11111111, 0xeeeeeeee, 0xefefefef,
 29  	0x10101010, 0xefefefef, 0x10101010, 0xefefefef,
 30  	0x10101010, 0xefefefef, 0x10101010, 0xefefefef,
 31  	0x00000000, 0xffffffff, 0x00000000, 0xffffffff,
 32  	0x00000000, 0xffffffff, 0x00000000, 0x00000000,
 33  };
 34  #define READ_TIMING_P_SHIFT	3
 35  #define READ_TIMING_P_BOUND	(1 << READ_TIMING_P_SHIFT)
 36  #define READ_TIMING_T_BOUND	14
 37  typedef struct {
 38  	int t;
 39  	int p;
 40  } read_timing_t;
 41  static void print_read_timing(const int msg_lvl, const char *const msg,
 42  			      const int lane, const int channel,
 43  			      const read_timing_t *const timing)
 44  {
 45  	printk(msg_lvl, "%sbyte lane %d, ch %d: %d.%d\n",
 46  	       msg, lane, channel, timing->t, timing->p);
 47  }
 48  
 49  static int normalize_read_timing(read_timing_t *const timing)
 50  {
 51  	while (timing->p >= READ_TIMING_P_BOUND) {
 52  		timing->t++;
 53  		timing->p -= READ_TIMING_P_BOUND;
 54  	}
 55  	while (timing->p < 0) {
 56  		timing->t--;
 57  		timing->p += READ_TIMING_P_BOUND;
 58  	}
 59  	if (timing->t < 0) {
 60  		printk(BIOS_WARNING,
 61  		       "Timing underflow during read training.\n");
 62  		timing->t = 0;
 63  		timing->p = 0;
 64  		return -1;
 65  	} else if (timing->t >= READ_TIMING_T_BOUND) {
 66  		printk(BIOS_WARNING,
 67  		       "Timing overflow during read training.\n");
 68  		timing->t = READ_TIMING_T_BOUND - 1;
 69  		timing->p = READ_TIMING_P_BOUND - 1;
 70  		return -1;
 71  	}
 72  	return 0;
 73  }
 74  static int program_read_timing(const int ch, const int lane,
 75  			       read_timing_t *const timing)
 76  {
 77  	if (normalize_read_timing(timing) < 0)
 78  		return -1;
 79  
 80  	u32 reg = mchbar_read32(CxRDTy_MCHBAR(ch, lane));
 81  	reg &= ~(CxRDTy_T_MASK | CxRDTy_P_MASK);
 82  	reg |= CxRDTy_T(timing->t) | CxRDTy_P(timing->p);
 83  	mchbar_write32(CxRDTy_MCHBAR(ch, lane), reg);
 84  
 85  	return 0;
 86  }
 87  /* Returns 1 on success, 0 on failure. */
 88  static int read_training_test(const int channel, const int lane,
 89  			      const address_bunch_t *const addresses)
 90  {
 91  	int i;
 92  
 93  	const int lane_offset = lane & 4;
 94  	const int lane_mask = 0xff << ((lane & ~4) << 3);
 95  
 96  	for (i = 0; i < addresses->count; ++i) {
 97  		unsigned int offset;
 98  		for (offset = lane_offset; offset < 320; offset += 8) {
 99  			const u32 read = read32p(addresses->addr[i] + offset);
100  			const u32 good = read_training_schedule[offset >> 3];
101  			if ((read & lane_mask) != (good & lane_mask))
102  				return 0;
103  		}
104  	}
105  	return 1;
106  }
107  static int read_training_find_lower(const int channel, const int lane,
108  				    const address_bunch_t *const addresses,
109  				    read_timing_t *const lower)
110  {
111  	/* Coarse search for good t. */
112  	program_read_timing(channel, lane, lower);
113  	while (!read_training_test(channel, lane, addresses)) {
114  		++lower->t;
115  		if (program_read_timing(channel, lane, lower) < 0)
116  			return -1;
117  	}
118  
119  	/* Step back, then fine search for good p. */
120  	if (lower->t <= 0)
121  		/* Can't step back, zero is good. */
122  		return 0;
123  
124  	--lower->t;
125  	program_read_timing(channel, lane, lower);
126  	while (!read_training_test(channel, lane, addresses)) {
127  		++lower->p;
128  		if (program_read_timing(channel, lane, lower) < 0)
129  			return -1;
130  	}
131  
132  	return 0;
133  }
134  static int read_training_find_upper(const int channel, const int lane,
135  				    const address_bunch_t *const addresses,
136  				    read_timing_t *const upper)
137  {
138  	if (program_read_timing(channel, lane, upper) < 0)
139  		return -1;
140  	if (!read_training_test(channel, lane, addresses)) {
141  		printk(BIOS_WARNING,
142  		       "Read training failure: limits too narrow.\n");
143  		return -1;
144  	}
145  	/* Coarse search for bad t. */
146  	do {
147  		++upper->t;
148  		if (program_read_timing(channel, lane, upper) < 0)
149  			return -1;
150  	} while (read_training_test(channel, lane, addresses));
151  	/* Fine search for bad p. */
152  	--upper->t;
153  	program_read_timing(channel, lane, upper);
154  	while (read_training_test(channel, lane, addresses)) {
155  		++upper->p;
156  		if (program_read_timing(channel, lane, upper) < 0)
157  			return -1;
158  	}
159  
160  	return 0;
161  }
162  static void read_training_per_lane(const int channel, const int lane,
163  				   const address_bunch_t *const addresses)
164  {
165  	read_timing_t lower, upper;
166  
167  	mchbar_setbits32(CxRDTy_MCHBAR(channel, lane), 3 << 25);
168  
169  	/*** Search lower bound. ***/
170  
171  	/* Start at zero. */
172  	lower.t = 0;
173  	lower.p = 0;
174  	if (read_training_find_lower(channel, lane, addresses, &lower) < 0)
175  		die("Read training failure: lower bound.\n");
176  	print_read_timing(RAM_DEBUG, "Lower bound for ", lane, channel, &lower);
177  
178  	/*** Search upper bound. ***/
179  
180  	/* Start at lower + 1t. */
181  	upper.t = lower.t + 1;
182  	upper.p = lower.p;
183  	if (read_training_find_upper(channel, lane, addresses, &upper) < 0)
184  		/* Overflow on upper edge is not fatal. */
185  		printk(BIOS_WARNING, "Read training failure: upper bound.\n");
186  	print_read_timing(RAM_DEBUG, "Upper bound for ", lane, channel, &upper);
187  
188  	/*** Calculate and program mean value. ***/
189  
190  	lower.p += lower.t << READ_TIMING_P_SHIFT;
191  	upper.p += upper.t << READ_TIMING_P_SHIFT;
192  	const int mean_p = (lower.p + upper.p) >> 1;
193  	/* lower becomes the mean value. */
194  	lower.t = mean_p >> READ_TIMING_P_SHIFT;
195  	lower.p = mean_p & (READ_TIMING_P_BOUND - 1);
196  	program_read_timing(channel, lane, &lower);
197  	printk(RAM_DEBUG, "Final timings for ");
198  	print_read_timing(BIOS_DEBUG, "", lane, channel, &lower);
199  }
200  static void perform_read_training(const dimminfo_t *const dimms)
201  {
202  	int ch, i;
203  
204  	FOR_EACH_POPULATED_CHANNEL(dimms, ch) {
205  		address_bunch_t addresses = { { 0, }, 0 };
206  		FOR_EACH_POPULATED_RANK_IN_CHANNEL(dimms, ch, i)
207  			addresses.addr[addresses.count++] =
208  				raminit_get_rank_addr(ch, i);
209  
210  		for (i = 0; i < addresses.count; ++i) {
211  			/* Write test pattern. */
212  			unsigned int offset;
213  			for (offset = 0; offset < 320; offset += 4)
214  				write32p(addresses.addr[i] + offset,
215  					read_training_schedule[offset >> 3]);
216  		}
217  
218  		for (i = 0; i < 8; ++i)
219  			read_training_per_lane(ch, i, &addresses);
220  	}
221  }
222  static void read_training_store_results(void)
223  {
224  	u8 bytes[TOTAL_CHANNELS * 8];
225  	int ch, i;
226  
227  	/* Store one timing pair in one byte each. */
228  	FOR_EACH_CHANNEL(ch) {
229  		for (i = 0; i < 8; ++i) {
230  			const u32 bl_reg = mchbar_read32(CxRDTy_MCHBAR(ch, i));
231  			bytes[(ch * 8) + i] =
232  				(((bl_reg & CxRDTy_T_MASK) >> CxRDTy_T_SHIFT)
233  				 << 4) |
234  				((bl_reg & CxRDTy_P_MASK) >> CxRDTy_P_SHIFT);
235  		}
236  	}
237  
238  	/* Store everything in CMOS above 128 bytes. */
239  	for (i = 0; i < (TOTAL_CHANNELS * 8); ++i)
240  		cmos_write(bytes[i], CMOS_READ_TRAINING + i);
241  }
242  static void read_training_restore_results(void)
243  {
244  	u8 bytes[TOTAL_CHANNELS * 8];
245  	int ch, i;
246  
247  	/* Read from CMOS. */
248  	for (i = 0; i < (TOTAL_CHANNELS * 8); ++i)
249  		bytes[i] = cmos_read(CMOS_READ_TRAINING + i);
250  
251  	/* Program restored results. */
252  	FOR_EACH_CHANNEL(ch) {
253  		for (i = 0; i < 8; ++i) {
254  			const int t = bytes[(ch * 8) + i] >> 4;
255  			const int p = bytes[(ch * 8) + i] & 7;
256  			u32 bl_reg = mchbar_read32(CxRDTy_MCHBAR(ch, i));
257  			bl_reg &= ~(CxRDTy_T_MASK | CxRDTy_P_MASK);
258  			bl_reg |= (3 << 25) | CxRDTy_T(t) | CxRDTy_P(p);
259  			mchbar_write32(CxRDTy_MCHBAR(ch, i), bl_reg);
260  			printk(BIOS_DEBUG, "Restored timings for byte lane "
261  			       "%d on channel %d: %d.%d\n", i, ch, t, p);
262  		}
263  	}
264  }
265  void raminit_read_training(const dimminfo_t *const dimms, const int s3resume)
266  {
267  	if (!s3resume) {
268  		perform_read_training(dimms);
269  		read_training_store_results();
270  	} else {
271  		read_training_restore_results();
272  	}
273  	raminit_reset_readwrite_pointers();
274  }
275  
276  /* Write Training. */
277  #define CxWRTy_T_SHIFT		28
278  #define CxWRTy_T_MASK		(0xf << CxWRTy_T_SHIFT)
279  #define CxWRTy_T(t)		(((t) << CxWRTy_T_SHIFT) & CxWRTy_T_MASK)
280  #define CxWRTy_P_SHIFT		24
281  #define CxWRTy_P_MASK		(0x7 << CxWRTy_P_SHIFT)
282  #define CxWRTy_P(p)		(((p) << CxWRTy_P_SHIFT) & CxWRTy_P_MASK)
283  #define CxWRTy_F_SHIFT		18
284  #define CxWRTy_F_MASK		(0x3 << CxWRTy_F_SHIFT)
285  #define CxWRTy_F(f)		(((f) << CxWRTy_F_SHIFT) & CxWRTy_F_MASK)
286  #define CxWRTy_D_SHIFT		16
287  #define CxWRTy_D_MASK		(0x3 << CxWRTy_D_SHIFT)
288  #define CxWRTy_BELOW_D		(0x3 << CxWRTy_D_SHIFT)
289  #define CxWRTy_ABOVE_D		(0x1 << CxWRTy_D_SHIFT)
290  static const u32 write_training_schedule[] = {
291  	0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
292  	0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
293  	0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
294  	0xffffffff, 0x00000000, 0xffffffff, 0x00000000,
295  	0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
296  	0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
297  	0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
298  	0xefefefef, 0x10101010, 0xefefefef, 0x10101010,
299  	0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
300  	0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
301  	0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
302  	0xefefefef, 0xeeeeeeee, 0x11111111, 0x10101010,
303  	0x03030303, 0x04040404, 0x09090909, 0x10101010,
304  	0x21212121, 0x40404040, 0x81818181, 0x00000000,
305  	0x03030303, 0x04040404, 0x09090909, 0x10101010,
306  	0x21212121, 0x40404040, 0x81818181, 0x00000000,
307  	0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
308  	0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe,
309  	0xfdfdfdfd, 0xfafafafa, 0xf7f7f7f7, 0xeeeeeeee,
310  	0xdfdfdfdf, 0xbebebebe, 0x7f7f7f7f, 0xfefefefe,
311  };
312  /* for raw card types A, B and C: MEM_CLOCK_1067MT? X group X lower/upper */
313  static const u32 write_training_bytelane_masks_abc[2][4][2] = {
314  	{ /* clock < MEM_CLOCK_1067MT */
315  		{ 0xffffffff, 0x00000000 }, { 0x00000000, 0x00000000 },
316  		{ 0x00000000, 0xffffffff }, { 0x00000000, 0x00000000 },
317  	},
318  	{ /* clock == MEM_CLOCK_1067MT */
319  		{ 0x0000ffff, 0x00000000 }, { 0xffff0000, 0x00000000 },
320  		{ 0x00000000, 0x0000ffff }, { 0x00000000, 0xffff0000 },
321  	},
322  };
323  /* for raw card type F: group X lower/upper */
324  static const u32 write_training_bytelane_masks_f[4][2] = {
325  	{ 0xff00ff00, 0x00000000 }, { 0x00ff00ff, 0x00000000 },
326  	{ 0x00000000, 0xff00ff00 }, { 0x00000000, 0x00ff00ff },
327  };
328  #define WRITE_TIMING_P_SHIFT	3
329  #define WRITE_TIMING_P_BOUND	(1 << WRITE_TIMING_P_SHIFT)
330  #define WRITE_TIMING_F_BOUND	4
331  typedef struct {
332  	int f;
333  	int t;
334  	const int t_bound;
335  	int p;
336  } write_timing_t;
337  static void print_write_timing(const int msg_lvl, const char *const msg,
338  			       const int group, const int channel,
339  			       const write_timing_t *const timing)
340  {
341  	printk(msg_lvl, "%sgroup %d, ch %d: %d.%d.%d\n",
342  	       msg, group, channel, timing->f, timing->t, timing->p);
343  }
344  
345  static int normalize_write_timing(write_timing_t *const timing)
346  {
347  	while (timing->p >= WRITE_TIMING_P_BOUND) {
348  		timing->t++;
349  		timing->p -= WRITE_TIMING_P_BOUND;
350  	}
351  	while (timing->p < 0) {
352  		timing->t--;
353  		timing->p += WRITE_TIMING_P_BOUND;
354  	}
355  	while (timing->t >= timing->t_bound) {
356  		timing->f++;
357  		timing->t -= timing->t_bound;
358  	}
359  	while (timing->t < 0) {
360  		timing->f--;
361  		timing->t += timing->t_bound;
362  	}
363  	if (timing->f < 0) {
364  		printk(BIOS_WARNING,
365  		       "Timing underflow during write training.\n");
366  		timing->f = 0;
367  		timing->t = 0;
368  		timing->p = 0;
369  		return -1;
370  	} else if (timing->f >= WRITE_TIMING_F_BOUND) {
371  		printk(BIOS_WARNING,
372  		       "Timing overflow during write training.\n");
373  		timing->f = WRITE_TIMING_F_BOUND - 1;
374  		timing->t = timing->t_bound - 1;
375  		timing->p = WRITE_TIMING_P_BOUND - 1;
376  		return -1;
377  	}
378  	return 0;
379  }
380  static int program_write_timing(const int ch, const int group,
381  				write_timing_t *const timing, int memclk1067)
382  {
383  	/* MEM_CLOCK_1067MT? X lower/upper */
384  	const u32 d_bounds[2][2] = { { 1, 6 }, { 2, 9 } };
385  
386  	if (normalize_write_timing(timing) < 0)
387  		return -1;
388  
389  	const int f = timing->f;
390  	const int t = timing->t;
391  	const int p = (memclk1067 && (((t ==  9) && (timing->p >= 4)) ||
392  				       ((t == 10) && (timing->p < 4))))
393  		? 4 : timing->p;
394  	const int d =
395  		(t <= d_bounds[memclk1067][0]) ? CxWRTy_BELOW_D :
396  		((t >  d_bounds[memclk1067][1]) ? CxWRTy_ABOVE_D : 0);
397  
398  	u32 reg = mchbar_read32(CxWRTy_MCHBAR(ch, group));
399  	reg &= ~(CxWRTy_T_MASK | CxWRTy_P_MASK | CxWRTy_F_MASK);
400  	reg &= ~CxWRTy_D_MASK;
401  	reg |= CxWRTy_T(t) | CxWRTy_P(p) | CxWRTy_F(f) | d;
402  	mchbar_write32(CxWRTy_MCHBAR(ch, group), reg);
403  
404  	return 0;
405  }
406  /* Returns 1 on success, 0 on failure. */
407  static int write_training_test(const address_bunch_t *const addresses,
408  			       const u32 *const masks)
409  {
410  	int i, ret = 0;
411  
412  	const u32 mmarb0 = mchbar_read32(0x0220);
413  	const u8  wrcctl = mchbar_read8(0x0218);
414  	mchbar_setbits32(0x0220, 0xf << 28);
415  	mchbar_setbits8(0x0218,  0x1 <<  4);
416  
417  	for (i = 0; i < addresses->count; ++i) {
418  		const unsigned int addr = addresses->addr[i];
419  		unsigned int off;
420  		for (off = 0; off < 640; off += 8) {
421  			const u32 pattern = write_training_schedule[off >> 3];
422  			write32p(addr + off, pattern);
423  			write32p(addr + off + 4, pattern);
424  		}
425  
426  		mchbar_setbits8(0x78, 1);
427  
428  		for (off = 0; off < 640; off += 8) {
429  			const u32 good = write_training_schedule[off >> 3];
430  			const u32 read1 = read32p(addr + off);
431  			if ((read1 & masks[0]) != (good & masks[0]))
432  				goto _bad_timing_out;
433  			const u32 read2 = read32p(addr + off + 4);
434  			if ((read2 & masks[1]) != (good & masks[1]))
435  				goto _bad_timing_out;
436  		}
437  	}
438  	ret = 1;
439  
440  _bad_timing_out:
441  	mchbar_write32(0x0220, mmarb0);
442  	mchbar_write8(0x0218, wrcctl);
443  
444  	return ret;
445  }
446  static int write_training_find_lower(const int ch, const int group,
447  				     const address_bunch_t *const addresses,
448  				     const u32 masks[][2], const int memclk1067,
449  				     write_timing_t *const lower)
450  {
451  	program_write_timing(ch, group, lower, memclk1067);
452  	/* Coarse search for good t. */
453  	while (!write_training_test(addresses, masks[group])) {
454  		++lower->t;
455  		if (program_write_timing(ch, group, lower, memclk1067) < 0)
456  			return -1;
457  	}
458  	/* Step back, then fine search for good p. */
459  	if ((lower->f <= 0) && (lower->t <= 0))
460  		/* Can't step back, zero is good. */
461  		return 0;
462  
463  	--lower->t;
464  	program_write_timing(ch, group, lower, memclk1067);
465  	while (!write_training_test(addresses, masks[group])) {
466  		++lower->p;
467  		if (program_write_timing(ch, group, lower, memclk1067) < 0)
468  			return -1;
469  	}
470  
471  	return 0;
472  }
473  static int write_training_find_upper(const int ch, const int group,
474  				     const address_bunch_t *const addresses,
475  				     const u32 masks[][2], const int memclk1067,
476  				     write_timing_t *const upper)
477  {
478  	if (program_write_timing(ch, group, upper, memclk1067) < 0)
479  		return -1;
480  	if (!write_training_test(addresses, masks[group])) {
481  		printk(BIOS_WARNING,
482  		       "Write training failure; limits too narrow.\n");
483  		return -1;
484  	}
485  	/* Coarse search for bad t. */
486  	while (write_training_test(addresses, masks[group])) {
487  		++upper->t;
488  		if (program_write_timing(ch, group, upper, memclk1067) < 0)
489  			return -1;
490  	}
491  	/* Fine search for bad p. */
492  	--upper->t;
493  	program_write_timing(ch, group, upper, memclk1067);
494  	while (write_training_test(addresses, masks[group])) {
495  		++upper->p;
496  		if (program_write_timing(ch, group, upper, memclk1067) < 0)
497  			return -1;
498  	}
499  
500  	return 0;
501  }
502  static void write_training_per_group(const int ch, const int group,
503  				     const address_bunch_t *const addresses,
504  				     const u32 masks[][2], const int memclk1067)
505  {
506  	const int t_bound = memclk1067 ? 12 : 11;
507  	write_timing_t lower = { 0, 0, t_bound, 0 },
508  		       upper = { 0, 0, t_bound, 0 };
509  
510  	/*** Search lower bound. ***/
511  
512  	/* Start at -1f from current values. */
513  	const u32 reg = mchbar_read32(CxWRTy_MCHBAR(ch, group));
514  	lower.t =  (reg >> 12) & 0xf;
515  	lower.p =  (reg >>  8) & 0x7;
516  	lower.f = ((reg >>  2) & 0x3) - 1;
517  
518  	if (write_training_find_lower(ch, group, addresses,
519  				      masks, memclk1067, &lower) < 0)
520  		die("Write training failure: lower bound.\n");
521  	print_write_timing(RAM_DEBUG, "Lower bound for ", group, ch, &lower);
522  
523  	/*** Search upper bound. ***/
524  
525  	/* Start at lower + 3t. */
526  	upper.t = lower.t + 3;
527  	upper.p = lower.p;
528  	upper.f = lower.f;
529  
530  	if (write_training_find_upper(ch, group, addresses,
531  				      masks, memclk1067, &upper) < 0)
532  		printk(BIOS_WARNING, "Write training failure: upper bound.\n");
533  	print_write_timing(RAM_DEBUG, "Upper bound for ", group, ch, &upper);
534  
535  	/*** Calculate and program mean value. ***/
536  
537  	lower.t += lower.f * lower.t_bound;
538  	lower.p += lower.t << WRITE_TIMING_P_SHIFT;
539  	upper.t += upper.f * upper.t_bound;
540  	upper.p += upper.t << WRITE_TIMING_P_SHIFT;
541  	/* lower becomes the mean value. */
542  	const int mean_p = (lower.p + upper.p) >> 1;
543  	lower.f = mean_p / (lower.t_bound << WRITE_TIMING_P_SHIFT);
544  	lower.t = (mean_p >> WRITE_TIMING_P_SHIFT) % lower.t_bound;
545  	lower.p = mean_p & (WRITE_TIMING_P_BOUND - 1);
546  	program_write_timing(ch, group, &lower, memclk1067);
547  	printk(RAM_DEBUG, "Final timings for ");
548  	print_write_timing(BIOS_DEBUG, "", group, ch, &lower);
549  }
550  static void perform_write_training(const int memclk1067,
551  				   const dimminfo_t *const dimms)
552  {
553  	const int cardF[] = { dimms[0].card_type == 0xf,
554  			      dimms[1].card_type == 0xf };
555  	int ch, r, group;
556  
557  	address_bunch_t addr[2] = { { { 0, }, 0 }, { { 0, }, 0 }, };
558  	/* Add check if channel A is populated, i.e. if cardF[0] is valid.
559  	 * Otherwise we would write channel A registers when DIMM in channel B
560  	 * is of raw card type A, B or C (cardF[1] == 0) even if channel A is
561  	 * not populated.
562  	 * Needs raw card type A, B or C for testing. */
563  	if ((dimms[0].card_type != 0) && (cardF[0] == cardF[1])) {
564  		/* Common path for both channels. */
565  		FOR_EACH_POPULATED_RANK(dimms, ch, r)
566  			addr[0].addr[addr[0].count++] =
567  				raminit_get_rank_addr(ch, r);
568  	} else {
569  		FOR_EACH_POPULATED_RANK(dimms, ch, r)
570  			addr[ch].addr[addr[ch].count++] =
571  				raminit_get_rank_addr(ch, r);
572  	}
573  
574  	FOR_EACH_CHANNEL(ch) if (addr[ch].count > 0) {
575  		const u32 (*const masks)[2] = (!cardF[ch])
576  			? write_training_bytelane_masks_abc[memclk1067]
577  			: write_training_bytelane_masks_f;
578  		for (group = 0; group < 4; ++group) {
579  			if (!masks[group][0] && !masks[group][1])
580  				continue;
581  			write_training_per_group(
582  				ch, group, &addr[ch], masks, memclk1067);
583  		}
584  	}
585  }
586  static void write_training_store_results(void)
587  {
588  	u8 bytes[TOTAL_CHANNELS * 4 * 2]; /* two bytes per group */
589  	int ch, i;
590  
591  	/* Store one T/P pair in one, F in the other byte. */
592  	/* We could save six bytes by putting all F values in two bytes. */
593  	FOR_EACH_CHANNEL(ch) {
594  		for (i = 0; i < 4; ++i) {
595  			const u32 reg = mchbar_read32(CxWRTy_MCHBAR(ch, i));
596  			bytes[(ch * 8) + (i * 2)] =
597  				(((reg & CxWRTy_T_MASK)
598  				  >> CxWRTy_T_SHIFT) << 4) |
599  				((reg & CxWRTy_P_MASK) >> CxWRTy_P_SHIFT);
600  			bytes[(ch * 8) + (i * 2) + 1] =
601  				((reg & CxWRTy_F_MASK) >> CxWRTy_F_SHIFT);
602  		}
603  	}
604  
605  	/* Store everything in CMOS above 128 bytes. */
606  	for (i = 0; i < (TOTAL_CHANNELS * 4 * 2); ++i)
607  		cmos_write(bytes[i], CMOS_WRITE_TRAINING + i);
608  }
609  static void write_training_restore_results(const int memclk1067)
610  {
611  	const int t_bound = memclk1067 ? 12 : 11;
612  
613  	u8 bytes[TOTAL_CHANNELS * 4 * 2]; /* two bytes per group */
614  	int ch, i;
615  
616  	/* Read from CMOS. */
617  	for (i = 0; i < (TOTAL_CHANNELS * 4 * 2); ++i)
618  		bytes[i] = cmos_read(CMOS_WRITE_TRAINING + i);
619  
620  	/* Program with original program_write_timing(). */
621  	FOR_EACH_CHANNEL(ch) {
622  		for (i = 0; i < 4; ++i) {
623  			write_timing_t timing = { 0, 0, t_bound, 0 };
624  			timing.f = bytes[(ch * 8) + (i * 2) + 1] & 3;
625  			timing.t = bytes[(ch * 8) + (i * 2)] >> 4;
626  			timing.p = bytes[(ch * 8) + (i * 2)] & 7;
627  			program_write_timing(ch, i, &timing, memclk1067);
628  			printk(BIOS_DEBUG, "Restored timings for group %d "
629  					   "on channel %d: %d.%d.%d\n",
630  			       i, ch, timing.f, timing.t, timing.p);
631  		}
632  	}
633  }
634  void raminit_write_training(const mem_clock_t ddr3clock,
635  			    const dimminfo_t *const dimms,
636  			    const int s3resume)
637  {
638  	const int memclk1067 = ddr3clock == MEM_CLOCK_1067MT;
639  
640  	if (!s3resume) {
641  		perform_write_training(memclk1067, dimms);
642  		write_training_store_results();
643  	} else {
644  		write_training_restore_results(memclk1067);
645  	}
646  	raminit_reset_readwrite_pointers();
647  }