raminit_receive_enable_calibration.c
1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 3 #include <stdint.h> 4 #include <device/mmio.h> 5 #include <console/console.h> 6 #include "gm45.h" 7 8 #define CxRECy_MCHBAR(x, y) (0x14a0 + ((x) * 0x0100) + ((3 - (y)) * 4)) 9 #define CxRECy_SHIFT_L 0 10 #define CxRECy_MASK_L (3 << CxRECy_SHIFT_L) 11 #define CxRECy_SHIFT_H 16 12 #define CxRECy_MASK_H (3 << CxRECy_SHIFT_H) 13 #define CxRECy_T_SHIFT 28 14 #define CxRECy_T_MASK (0xf << CxRECy_T_SHIFT) 15 #define CxRECy_T(t) (((t) << CxRECy_T_SHIFT) & CxRECy_T_MASK) 16 #define CxRECy_P_SHIFT 24 17 #define CxRECy_P_MASK (0x7 << CxRECy_P_SHIFT) 18 #define CxRECy_P(p) (((p) << CxRECy_P_SHIFT) & CxRECy_P_MASK) 19 #define CxRECy_PH_SHIFT 22 20 #define CxRECy_PH_MASK (0x3 << CxRECy_PH_SHIFT) 21 #define CxRECy_PH(p) (((p) << CxRECy_PH_SHIFT) & CxRECy_PH_MASK) 22 #define CxRECy_PM_SHIFT 20 23 #define CxRECy_PM_MASK (0x3 << CxRECy_PM_SHIFT) 24 #define CxRECy_PM(p) (((p) << CxRECy_PM_SHIFT) & CxRECy_PM_MASK) 25 #define CxRECy_TIMING_MASK (CxRECy_T_MASK | CxRECy_P_MASK | \ 26 CxRECy_PH_MASK | CxRECy_PM_MASK) 27 28 #define CxDRT3_C_SHIFT 7 29 #define CxDRT3_C_MASK (0xf << CxDRT3_C_SHIFT) 30 #define CxDRT3_C(c) (((c) << CxDRT3_C_SHIFT) & CxDRT3_C_MASK) 31 /* group to byte-lane mapping: (cardF X group X 2 per group) */ 32 static const char bytelane_map[2][4][2] = { 33 /* A,B,C */{ { 0, 1 }, { 2, 3 }, { 4, 5 }, { 6, 7 } }, 34 /* F */{ { 0, 2 }, { 1, 3 }, { 4, 6 }, { 5, 7 } }, 35 }; 36 37 #define PH_BOUND 4 38 #define PH_STEP 2 39 #define PM_BOUND 3 40 #define C_BOUND 16 41 typedef struct { 42 int c; 43 int pre; 44 int ph; 45 int t; 46 const int t_bound; 47 int p; 48 const int p_bound; 49 } rec_timing_t; 50 static void normalize_rec_timing(rec_timing_t *const timing) 51 { 52 while (timing->p >= timing->p_bound) { 53 timing->t++; 54 timing->p -= timing->p_bound; 55 } 56 while (timing->p < 0) { 57 timing->t--; 58 timing->p += timing->p_bound; 59 } 60 while (timing->t >= timing->t_bound) { 61 timing->ph += PH_STEP; 62 timing->t -= timing->t_bound; 63 } 64 while (timing->t < 0) { 65 timing->ph -= PH_STEP; 66 timing->t += timing->t_bound; 67 } 68 while (timing->ph >= PH_BOUND) { 69 timing->c++; 70 timing->ph -= PH_BOUND; 71 } 72 while (timing->ph < 0) { 73 timing->c--; 74 timing->ph += PH_BOUND; 75 } 76 if (timing->c < 0 || timing->c >= C_BOUND) 77 die("Timing under-/overflow during " 78 "receive-enable calibration.\n"); 79 } 80 81 static void rec_full_backstep(rec_timing_t *const timing) 82 { 83 timing->c--; 84 } 85 static void rec_half_backstep(rec_timing_t *const timing) 86 { 87 timing->ph -= PH_STEP; 88 } 89 static void rec_quarter_step(rec_timing_t *const timing) 90 { 91 timing->t += (timing->t_bound) >> 1; 92 timing->p += (timing->t_bound & 1) * (timing->p_bound >> 1); 93 } 94 static void rec_quarter_backstep(rec_timing_t *const timing) 95 { 96 timing->t -= (timing->t_bound) >> 1; 97 timing->p -= (timing->t_bound & 1) * (timing->p_bound >> 1); 98 } 99 static void rec_smallest_step(rec_timing_t *const timing) 100 { 101 timing->p++; 102 } 103 104 static void program_timing(int channel, int group, 105 rec_timing_t timings[][4]) 106 { 107 rec_timing_t *const timing = &timings[channel][group]; 108 109 normalize_rec_timing(timing); 110 111 /* C value is per channel. */ 112 unsigned int mchbar = CxDRT3_MCHBAR(channel); 113 mchbar_clrsetbits32(mchbar, CxDRT3_C_MASK, CxDRT3_C(timing->c)); 114 115 /* All other per group. */ 116 mchbar = CxRECy_MCHBAR(channel, group); 117 u32 reg = mchbar_read32(mchbar); 118 reg &= ~CxRECy_TIMING_MASK; 119 reg |= CxRECy_T(timing->t) | CxRECy_P(timing->p) | 120 CxRECy_PH(timing->ph) | CxRECy_PM(timing->pre); 121 mchbar_write32(mchbar, reg); 122 } 123 124 static int read_dqs_level(const int channel, const int lane) 125 { 126 unsigned int mchbar = 0x14f0 + (channel * 0x0100); 127 mchbar_clrbits32(mchbar, 1 << 9); 128 mchbar_setbits32(mchbar, 1 << 9); 129 130 /* Read from this channel. */ 131 read32p(raminit_get_rank_addr(channel, 0)); 132 133 mchbar = 0x14b0 + (channel * 0x0100) + ((7 - lane) * 4); 134 return mchbar_read32(mchbar) & (1 << 30); 135 } 136 137 static void find_dqs_low(const int channel, const int group, 138 rec_timing_t timings[][4], const char lane_map[][2]) 139 { 140 /* Look for DQS low, using quarter steps. */ 141 while (read_dqs_level(channel, lane_map[group][0]) || 142 read_dqs_level(channel, lane_map[group][1])) { 143 rec_quarter_step(&timings[channel][group]); 144 program_timing(channel, group, timings); 145 } 146 } 147 static void find_dqs_high(const int channel, const int group, 148 rec_timing_t timings[][4], const char lane_map[][2]) 149 { 150 /* Look for _any_ DQS high, using quarter steps. */ 151 while (!read_dqs_level(channel, lane_map[group][0]) && 152 !read_dqs_level(channel, lane_map[group][1])) { 153 rec_quarter_step(&timings[channel][group]); 154 program_timing(channel, group, timings); 155 } 156 } 157 static void find_dqs_edge_lowhigh(const int channel, const int group, 158 rec_timing_t timings[][4], 159 const char lane_map[][2]) 160 { 161 /* Advance beyond previous high to low transition. */ 162 timings[channel][group].t += 2; 163 program_timing(channel, group, timings); 164 165 /* Coarsely look for DQS high. */ 166 find_dqs_high(channel, group, timings, lane_map); 167 168 /* Go back and perform finer search. */ 169 rec_quarter_backstep(&timings[channel][group]); 170 program_timing(channel, group, timings); 171 while (!read_dqs_level(channel, lane_map[group][0]) || 172 !read_dqs_level(channel, lane_map[group][1])) { 173 rec_smallest_step(&timings[channel][group]); 174 program_timing(channel, group, timings); 175 } 176 } 177 static void find_preamble(const int channel, const int group, 178 rec_timing_t timings[][4], const char lane_map[][2]) 179 { 180 /* Look for DQS low, backstepping. */ 181 while (read_dqs_level(channel, lane_map[group][0]) || 182 read_dqs_level(channel, lane_map[group][1])) { 183 rec_full_backstep(&timings[channel][group]); 184 program_timing(channel, group, timings); 185 } 186 } 187 188 static void receive_enable_calibration(const int ddr_type, 189 const timings_t *const timings, 190 const dimminfo_t *const dimms) 191 { 192 /* Override group to byte-lane mapping for raw card type F DIMMS. */ 193 static const char over_bytelane_map[2][4][2] = { 194 /* A,B,C */{ { 0, 1 }, { 2, 3 }, { 4, 5 }, { 6, 7 } }, 195 /* F */{ { 0, 0 }, { 3, 3 }, { 6, 6 }, { 5, 5 } }, 196 }; 197 198 const int cardF[] = { 199 dimms[0].card_type == 0xf, 200 dimms[1].card_type == 0xf, 201 }; 202 203 const unsigned int t_bound = 204 (timings->mem_clock == MEM_CLOCK_1067MT) ? 9 205 : (ddr_type == DDR3) ? 12 : 15; 206 const unsigned int p_bound = 207 (timings->mem_clock == MEM_CLOCK_1067MT) ? 8 : 1; 208 209 rec_timing_t rec_timings[2][4] = { 210 { 211 { timings->CAS + 1, 0, 0, 0, t_bound, 0, p_bound }, 212 { timings->CAS + 1, 0, 0, 0, t_bound, 0, p_bound }, 213 { timings->CAS + 1, 0, 0, 0, t_bound, 0, p_bound }, 214 { timings->CAS + 1, 0, 0, 0, t_bound, 0, p_bound } 215 }, { 216 { timings->CAS + 1, 0, 0, 0, t_bound, 0, p_bound }, 217 { timings->CAS + 1, 0, 0, 0, t_bound, 0, p_bound }, 218 { timings->CAS + 1, 0, 0, 0, t_bound, 0, p_bound }, 219 { timings->CAS + 1, 0, 0, 0, t_bound, 0, p_bound } 220 } 221 }; 222 223 int ch, group; 224 FOR_EACH_POPULATED_CHANNEL(dimms, ch) { 225 const char (*const map)[2] = over_bytelane_map[cardF[ch]]; 226 for (group = 0; group < 4; ++group) { 227 program_timing(ch, group, rec_timings); 228 find_dqs_low(ch, group, rec_timings, map); 229 find_dqs_edge_lowhigh(ch, group, rec_timings, map); 230 231 rec_quarter_step(&rec_timings[ch][group]); 232 program_timing(ch, group, rec_timings); 233 find_preamble(ch, group, rec_timings, map); 234 find_dqs_edge_lowhigh(ch, group, rec_timings, map); 235 rec_half_backstep(&rec_timings[ch][group]); 236 normalize_rec_timing(&rec_timings[ch][group]); 237 if (cardF[ch]) { 238 rec_timings[ch][group].t++; 239 program_timing(ch, group, rec_timings); 240 } 241 } 242 int c_min = C_BOUND; 243 for (group = 0; group < 4; ++group) { 244 if (rec_timings[ch][group].c < c_min) 245 c_min = rec_timings[ch][group].c; 246 } 247 for (group = 0; group < 4; ++group) { 248 rec_timings[ch][group].pre = 249 rec_timings[ch][group].c - c_min; 250 rec_timings[ch][group].c = c_min; 251 program_timing(ch, group, rec_timings); 252 printk(RAM_DEBUG, "Final timings for "); 253 printk(BIOS_DEBUG, "group %d, ch %d: %d.%d.%d.%d.%d\n", 254 group, ch, 255 rec_timings[ch][group].c, 256 rec_timings[ch][group].pre, 257 rec_timings[ch][group].ph, 258 rec_timings[ch][group].t, 259 rec_timings[ch][group].p); 260 } 261 } 262 } 263 264 void raminit_receive_enable_calibration(const int ddr_type, 265 const timings_t *const timings, 266 const dimminfo_t *const dimms) 267 { 268 int ch; 269 270 /* Setup group to byte-lane mapping. */ 271 FOR_EACH_POPULATED_CHANNEL(dimms, ch) { 272 const char (*const map)[2] = 273 bytelane_map[dimms[ch].card_type == 0xf]; 274 unsigned int group; 275 for (group = 0; group < 4; ++group) { 276 const unsigned int mchbar = CxRECy_MCHBAR(ch, group); 277 u32 reg = mchbar_read32(mchbar); 278 reg &= ~((3 << 16) | (1 << 8) | 3); 279 reg |= (map[group][0] - group); 280 reg |= (map[group][1] - group - 1) << 16; 281 mchbar_write32(mchbar, reg); 282 } 283 } 284 285 mchbar_setbits32(0x12a4, 1 << 31); 286 mchbar_setbits32(0x13a4, 1 << 31); 287 mchbar_clrsetbits32(0x14f0, 3 << 9, 1 << 9); 288 mchbar_clrsetbits32(0x15f0, 3 << 9, 1 << 9); 289 290 receive_enable_calibration(ddr_type, timings, dimms); 291 292 mchbar_clrbits32(0x12a4, 1 << 31); 293 mchbar_clrbits32(0x13a4, 1 << 31); 294 raminit_reset_readwrite_pointers(); 295 }