xref: /rk3399_ARM-atf/drivers/st/ddr/phy/phyinit/src/ddrphy_phyinit_progcsrskiptrain.c (revision 681296444e508e722565c6713effd2cf346a4dcf)
1 /*
2  * Copyright (C) 2021-2026, STMicroelectronics - All Rights Reserved
3  *
4  * SPDX-License-Identifier: BSD-3-Clause
5  */
6 
7 #include <arch_helpers.h>
8 
9 #include <common/debug.h>
10 
11 #include <ddrphy_phyinit.h>
12 #include <ddrphy_wrapper.h>
13 
14 #include <lib/mmio.h>
15 
16 #include <platform_def.h>
17 
18 struct phyinit_timings {
19 	int tstaoff;
20 	int tpdm;
21 	int tcasl_add;
22 };
23 
24 static struct phyinit_timings timings;
25 
26 /*
27  * Program dfimrl according to this formula:
28  *
29  *         dfimrl = ceiling( (ARdPtrinitval*UI + phy_tx_insertion_dly +
30  *                            phy_rx_insertion_dly + PHY_Rx_Fifo_dly + tDQSCK + tstaoff) /
31  *                           dficlk_period)
32  *
33  * All terms in above equation specified in ps
34  * tDQSCK - determine from memory model
35  * tstaoff - determine from memory model
36  * phy_tx_insertion_dly = 200ps
37  * phy_rx_insertion_dly = 200ps
38  * phy_rx_fifo_dly      = 200ps + 4UI
39  */
dfimrl_program(struct stm32mp_ddr_config * config,struct pmu_smb_ddr_1d * mb_ddr_1d,int ardptrinitval)40 static void dfimrl_program(struct stm32mp_ddr_config *config, struct pmu_smb_ddr_1d *mb_ddr_1d,
41 			   int ardptrinitval)
42 {
43 	uint32_t byte;
44 	int dfimrl_in_dficlk;
45 	int phy_rx_fifo_dly;
46 	int phy_rx_insertion_dly = 200;
47 	int phy_tx_insertion_dly = 200;
48 	int tmp_value;
49 	long long dficlk_period_x1000;
50 	long long dfimrl_in_fs;
51 	long long uifs;
52 	uint16_t dfimrl;
53 
54 	uifs = (1000 * 1000000) / ((int)config->uib.frequency * 2);
55 	dficlk_period_x1000 = 4 * uifs;
56 
57 	phy_rx_fifo_dly = (int)(((200 * 1000) + (4 * uifs)) / 1000);
58 
59 	tmp_value = phy_tx_insertion_dly + phy_rx_insertion_dly + phy_rx_fifo_dly +
60 		    timings.tstaoff + timings.tcasl_add + timings.tpdm;
61 	dfimrl_in_fs = (ardptrinitval * uifs) + ((long long)tmp_value * 1000);
62 
63 	dfimrl_in_dficlk = (int)(dfimrl_in_fs / dficlk_period_x1000);
64 	if ((dfimrl_in_fs % dficlk_period_x1000) != 0) {
65 		dfimrl_in_dficlk++;
66 	}
67 	dfimrl = (uint16_t)(dfimrl_in_dficlk + mb_ddr_1d->dfimrlmargin);
68 
69 	/*
70 	 * mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (TDBYTE | CBRD | CSR_DFIMRL_ADDR))),
71 	 *               dfimrl);
72 	 */
73 	for (byte = 0U; byte < config->uib.numdbyte; byte++) {
74 		uint32_t c_addr;
75 
76 		c_addr = byte << 12;
77 		mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (TDBYTE | c_addr |
78 								CSR_DFIMRL_ADDR))),
79 			      dfimrl);
80 	}
81 
82 	mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (TMASTER | CSR_HWTMRL_ADDR))), dfimrl);
83 }
84 
85 /*
86  * Program txdqsdlytg0/1[9:0]:
87  *
88  *         txdqsdlytg*[9:6] = floor( (4*UI + tstaoff) / UI)
89  *         txdqsdlytg*[5:0] = ceiling( (tstaoff%UI / UI) * 32)
90  *
91  * tstaoff and UI expressed in ps
92  *
93  * For HMD and LPDDR4X and MEMCLK <= 533 mhz:
94  *    txdqsdlytg*[9:6] = 0x5
95  *
96  * For other dimm types, leave TDqsDlyTg*[9:0] at default 0x100
97  *
98  * ppp_0001_cccc_uuuu_1101_0000
99  *
100  * if DDR3 or DDR4
101  *      num_timingroup = numrank_dfi0;
102  * else
103  *      num_timingroup = numrank_dfi0 + numrank_dfi1 * dfi1exists;
104  */
txdqsdlytg_program(struct stm32mp_ddr_config * config,struct pmu_smb_ddr_1d * mb_ddr_1d,uint16_t * txdqsdly)105 static void txdqsdlytg_program(struct stm32mp_ddr_config *config, struct pmu_smb_ddr_1d *mb_ddr_1d,
106 			       uint16_t *txdqsdly)
107 {
108 	uint32_t byte;
109 	int txdqsdlytg_5to0; /* Fine delay - 1/32UI per increment */
110 	int txdqsdlytg_9to6; /* Coarse delay - 1UI per increment */
111 	int txdqsdlytg_fine_default = 0;
112 	int txdqsdlytg_coarse_default = 4;
113 	int timings_sum;
114 	long long tmp_value;
115 	long long uifs;
116 
117 	uifs = (1000 * 1000000) / ((int)config->uib.frequency * 2);
118 
119 	txdqsdlytg_9to6 = (int)(((int)((txdqsdlytg_coarse_default * uifs) / 1000) +
120 				 timings.tstaoff + timings.tcasl_add
121 				 - timings.tpdm) / (int)(uifs / 1000));
122 
123 	timings_sum = timings.tstaoff + timings.tcasl_add - timings.tpdm;
124 	tmp_value = fmodll(((txdqsdlytg_fine_default * uifs / 32) +
125 			    ((long long)timings_sum * 1000)),
126 			   uifs);
127 	txdqsdlytg_5to0 = (int)(tmp_value / uifs * 32);
128 	if ((tmp_value % uifs) != 0) {
129 		txdqsdlytg_5to0++;
130 	}
131 
132 	/* Bit-5 of LCDL is no longer used, so bumping bit-5 of fine_dly up to coarse_dly */
133 	if (txdqsdlytg_5to0 >= 32) {
134 		txdqsdlytg_9to6 = txdqsdlytg_9to6 + 1;
135 		txdqsdlytg_5to0 = txdqsdlytg_5to0 - 32;
136 	}
137 
138 	*txdqsdly = (uint16_t)((txdqsdlytg_9to6 << 6) | txdqsdlytg_5to0);
139 
140 	for (byte = 0U; byte < config->uib.numdbyte; byte++) {
141 		uint32_t c_addr;
142 		uint32_t nibble;
143 
144 		c_addr = byte << 12;
145 		for (nibble = 0U; nibble < 2U; nibble++) {
146 			uint32_t u_addr;
147 
148 			if (ddrphy_phyinit_isdbytedisabled(config, mb_ddr_1d, byte) != 0) {
149 				continue;
150 			}
151 
152 			u_addr = nibble << 8;
153 
154 #if STM32MP_DDR3_TYPE || STM32MP_DDR4_TYPE
155 			if ((mb_ddr_1d->cspresent & 0x1U) != 0U) {
156 #else /* STM32MP_LPDDR4_TYPE */
157 			if (((mb_ddr_1d->cspresentcha & 0x1U) |
158 			     (mb_ddr_1d->cspresentchb & 0x1U)) != 0U) {
159 #endif /* STM32MP_DDR3_TYPE || STM32MP_DDR4_TYPE */
160 				mmio_write_16((uintptr_t)
161 					      (DDRPHYC_BASE + (4U * (TDBYTE | c_addr | u_addr |
162 								     CSR_TXDQSDLYTG0_ADDR))),
163 					      *txdqsdly);
164 			}
165 
166 #if STM32MP_LPDDR4_TYPE
167 			if ((((mb_ddr_1d->cspresentcha & 0x2U) >> 1) |
168 			     ((mb_ddr_1d->cspresentchb & 0x2U) >> 1)) != 0U) {
169 				mmio_write_16((uintptr_t)
170 					      (DDRPHYC_BASE + (4U * (TDBYTE | c_addr | u_addr |
171 								     CSR_TXDQSDLYTG1_ADDR))),
172 					      *txdqsdly);
173 			}
174 #endif /* STM32MP_LPDDR4_TYPE */
175 		}
176 	}
177 }
178 
179 /*
180  * ##############################################################
181  *
182  * Program txdqdlyTg0/1[8:0]:
183  *
184  *     txdqdlyTg*[8:6] = floor( (txdqsdlytg*[5:0]*UI/32 + tDQS2DQ + 0.5UI) / UI)
185  *     txdqdlyTg*[5:0] = ceil( ((txdqsdlytg*[5:0]*UI/32 + tDQS2DQ + 0.5UI)%UI / UI) * 32)
186  *
187  * ##############################################################
188  */
189 static void txdqdlytg_program(struct stm32mp_ddr_config *config, struct pmu_smb_ddr_1d *mb_ddr_1d,
190 			      uint16_t txdqsdly)
191 {
192 	uint32_t byte;
193 	int txdqdly_5to0; /* Fine delay - 1/32UI per increment */
194 	int txdqdly_8to6; /* Coarse delay - 1UI per increment */
195 	int txdqsdlytg_5to0; /* Fine delay - 1/32UI per increment */
196 	long long tmp_value;
197 	long long uifs;
198 	uint16_t txdqdly;
199 
200 	uifs = (1000 * 1000000) / ((int)config->uib.frequency * 2);
201 
202 	txdqsdlytg_5to0 = (int)txdqsdly & 0x3F;
203 
204 	txdqdly_8to6 = (int)(((txdqsdlytg_5to0 * uifs / 32) + (uifs / 2)) / uifs);
205 	tmp_value = fmodll(((txdqsdlytg_5to0 * uifs / 32) + (uifs / 2)), uifs);
206 	txdqdly_5to0 = (int)(((tmp_value * 32) / uifs));
207 	if ((tmp_value % uifs) != 0) {
208 		txdqdly_5to0++;
209 	}
210 
211 	/* Bit-5 of LCDL is no longer used, so bumping bit-5 of fine_dly up to coarse_dly */
212 	if (txdqdly_5to0 >= 32) {
213 		txdqdly_8to6 = txdqdly_8to6 + 1;
214 		txdqdly_5to0 = txdqdly_5to0 - 32;
215 	}
216 
217 	txdqdly = (uint16_t)((txdqdly_8to6 << 6) | txdqdly_5to0);
218 
219 	for (byte = 0U; byte < config->uib.numdbyte; byte++) {
220 		uint32_t c_addr;
221 		uint32_t lane;
222 
223 		c_addr = byte << 12;
224 		for (lane = 0U; lane < 9U; lane++) {
225 			uint32_t r_addr;
226 
227 			if (ddrphy_phyinit_isdbytedisabled(config, mb_ddr_1d, byte) != 0) {
228 				continue;
229 			}
230 
231 			r_addr = lane << 8;
232 
233 #if STM32MP_DDR3_TYPE || STM32MP_DDR4_TYPE
234 			if ((mb_ddr_1d->cspresent & 0x1U) != 0U) {
235 #else /* STM32MP_LPDDR4_TYPE */
236 			if (((mb_ddr_1d->cspresentcha & 0x1U) |
237 			     (mb_ddr_1d->cspresentchb & 0x1U)) != 0U) {
238 #endif /* STM32MP_DDR3_TYPE || STM32MP_DDR4_TYPE */
239 				mmio_write_16((uintptr_t)
240 					      (DDRPHYC_BASE + (4U * (TDBYTE | c_addr | r_addr |
241 								     CSR_TXDQDLYTG0_ADDR))),
242 					      txdqdly);
243 			}
244 
245 #if STM32MP_LPDDR4_TYPE
246 			if ((((mb_ddr_1d->cspresentcha & 0x2U) >> 1) |
247 			     ((mb_ddr_1d->cspresentchb & 0x2U) >> 1)) != 0U) {
248 				mmio_write_16((uintptr_t)
249 					      (DDRPHYC_BASE + (4U * (TDBYTE | c_addr | r_addr |
250 								     CSR_TXDQDLYTG1_ADDR))),
251 					      txdqdly);
252 			}
253 #endif /* STM32MP_LPDDR4_TYPE */
254 		}
255 	}
256 }
257 
258 /*
259  * Program rxendly0/1[10:0]:
260  *
261  *         rxendly[10:6] = floor( (4*UI + tDQSCK + tstaoff) / UI)
262  *         rxendly[5:0]  = ceil( ((tDQSCK + tstaoff) % UI) * 32)
263  *
264  * tDQSCK, tstaoff and UI expressed in ps
265  */
266 static void rxendly_program(struct stm32mp_ddr_config *config, struct pmu_smb_ddr_1d *mb_ddr_1d)
267 {
268 	int rxendly_coarse_default = 4;
269 	int rxendly_fine_default = 0;
270 
271 	int backoff_x1000 __maybe_unused;
272 	int zerobackoff_x1000 __maybe_unused;
273 	uint32_t byte;
274 	int rxendly_10to6; /* Coarse delay - 1UI per increment */
275 	int rxendly_5to0; /* Fine delay - 1/32UI per increment */
276 	int tmp_value;
277 	int totfinestep;
278 	long long finestepfs; /* Fine steps in fs */
279 	long long rxendly_offset_x1000000 = 0; /* 0 Offset is 1UI before the first DQS. */
280 	long long totfs;
281 	long long uifs;
282 	uint16_t rxendly;
283 
284 	uifs = (1000 * 1000000) / ((int)config->uib.frequency * 2);
285 
286 #if STM32MP_LPDDR4_TYPE
287 	/* Compensate for pptenrxenbackoff */
288 	zerobackoff_x1000 = (1000 * 24) / 32;
289 	if (config->uia.lp4rxpreamblemode == 1U) {
290 		backoff_x1000 = 1000 - ((1000 * 2) / 32);
291 	} else {
292 		backoff_x1000 = (1000 * (int)config->uia.rxenbackoff) - ((1000 * 2) / 32);
293 	}
294 
295 	if (config->uia.disableretraining == 0U) {
296 		rxendly_offset_x1000000 = config->uib.frequency < 333U ?
297 					  backoff_x1000 * uifs : zerobackoff_x1000 * uifs;
298 	} else {
299 		rxendly_offset_x1000000 = zerobackoff_x1000 * uifs;
300 	}
301 #endif /* STM32MP_LPDDR4_TYPE */
302 
303 	finestepfs = uifs / 32;
304 	tmp_value = timings.tstaoff + timings.tcasl_add + timings.tpdm;
305 	totfs = ((32 * rxendly_coarse_default * finestepfs) +
306 		 (rxendly_fine_default * finestepfs) + ((long long)tmp_value * 1000) +
307 		 (rxendly_offset_x1000000 / 1000));
308 	totfinestep = totfs / finestepfs;
309 
310 	rxendly_10to6 = totfinestep / 32;
311 	rxendly_5to0  = fmodi(totfinestep, 32);
312 
313 	/* Bit-5 of LCDL is no longer used, so bumping bit-5 of fine_dly up to coarse_dly */
314 	if (rxendly_5to0 >= 32) {
315 		rxendly_10to6 = rxendly_10to6 + 1;
316 		rxendly_5to0 = rxendly_5to0 - 32;
317 	}
318 
319 	rxendly = (uint16_t)((rxendly_10to6 << 6) | rxendly_5to0);
320 
321 	for (byte = 0U; byte < config->uib.numdbyte; byte++) {
322 		uint32_t c_addr;
323 		uint32_t nibble;
324 
325 		c_addr = byte << 12;
326 		for (nibble = 0U; nibble < 2U; nibble++) {
327 			uint32_t u_addr;
328 
329 			if (ddrphy_phyinit_isdbytedisabled(config, mb_ddr_1d, byte) != 0) {
330 				continue;
331 			}
332 
333 			u_addr = nibble << 8;
334 
335 #if STM32MP_DDR3_TYPE || STM32MP_DDR4_TYPE
336 			if ((mb_ddr_1d->cspresent & 0x1U) != 0) {
337 #else /* STM32MP_LPDDR4_TYPE */
338 			if (((mb_ddr_1d->cspresentcha & 0x1U) |
339 			     (mb_ddr_1d->cspresentchb & 0x1U)) != 0U) {
340 #endif /* STM32MP_DDR3_TYPE || STM32MP_DDR4_TYPE */
341 				mmio_write_16((uintptr_t)
342 					      (DDRPHYC_BASE + (4U * (TDBYTE | c_addr | u_addr |
343 								     CSR_RXENDLYTG0_ADDR))),
344 					      rxendly);
345 			}
346 
347 #if STM32MP_LPDDR4_TYPE
348 			if ((((mb_ddr_1d->cspresentcha & 0x2U) >> 1) |
349 			     ((mb_ddr_1d->cspresentchb & 0x2U) >> 1)) != 0U) {
350 				mmio_write_16((uintptr_t)
351 					      (DDRPHYC_BASE + (4U * (TDBYTE | c_addr | u_addr |
352 								     CSR_RXENDLYTG1_ADDR))),
353 					      rxendly);
354 			}
355 #endif /* STM32MP_LPDDR4_TYPE */
356 		}
357 	}
358 }
359 
360 #if STM32MP_LPDDR4_TYPE
361 /*
362  * Programming Seq0BGPR1/2/3 for LPDDR4
363  */
364 static void seq0bgpr_program(struct stm32mp_ddr_config *config)
365 {
366 	uint32_t extradly = 3U;
367 	uint32_t rl = 0U; /* Computed read latency */
368 	uint32_t wl = 0U; /* Computed write latency */
369 	uint16_t mr_dbi_rd; /* Extracted field from MR */
370 	uint16_t mr_rl;
371 	uint16_t mr_wl;
372 	uint16_t mr_wls;
373 	uint16_t regdata;
374 
375 	mr_rl = (uint16_t)config->uia.lp4rl;	/* RL[2:0] */
376 	mr_wl = (uint16_t)config->uia.lp4wl;	/* WL[5:3] */
377 	mr_wls = (uint16_t)config->uia.lp4wls;	/* WLS */
378 	mr_dbi_rd = (uint16_t)config->uia.lp4dbird; /* DBI-RD */
379 
380 	switch ((mr_dbi_rd << 3) | mr_rl) {
381 		/* DBI-RD Disabled */
382 	case  0U:
383 		rl = 6U;
384 		break;
385 	case  1U:
386 		rl = 10U;
387 		break;
388 	case  2U:
389 		rl = 14U;
390 		break;
391 	case  3U:
392 		rl = 20U;
393 		break;
394 	case  4U:
395 		rl = 24U;
396 		break;
397 	case  5U:
398 		rl = 28U;
399 		break;
400 	case  6U:
401 		rl = 32U;
402 		break;
403 	case  7U:
404 		rl = 36U;
405 		break;
406 		/* DBI-RD Enabled */
407 	case  8U:
408 		rl = 6U;
409 		break;
410 	case  9U:
411 		rl = 12U;
412 		break;
413 	case 10U:
414 		rl = 16U;
415 		break;
416 	case 11U:
417 		rl = 22U;
418 		break;
419 	case 12U:
420 		rl = 28U;
421 		break;
422 	case 13U:
423 		rl = 32U;
424 		break;
425 	case 14U:
426 		rl = 36U;
427 		break;
428 	case 15U:
429 		rl = 40U;
430 		break;
431 	default:
432 		rl = 6U;
433 		break;
434 	}
435 
436 	switch ((mr_wls << 3) | mr_wl) {
437 		/* DBI-RD Disabled */
438 	case  0U:
439 		wl = 4U;
440 		break;
441 	case  1U:
442 		wl = 6U;
443 		break;
444 	case  2U:
445 		wl = 8U;
446 		break;
447 	case  3U:
448 		wl = 10U;
449 		break;
450 	case  4U:
451 		wl = 12U;
452 		break;
453 	case  5U:
454 		wl = 14U;
455 		break;
456 	case  6U:
457 		wl = 16U;
458 		break;
459 	case  7U:
460 		wl = 18U;
461 		break;
462 		/* DBI-RD Enabled */
463 	case  8U:
464 		wl = 4U;
465 		break;
466 	case  9U:
467 		wl = 8U;
468 		break;
469 	case 10U:
470 		wl = 12U;
471 		break;
472 	case 11U:
473 		wl = 18U;
474 		break;
475 	case 12U:
476 		wl = 22U;
477 		break;
478 	case 13U:
479 		wl = 26U;
480 		break;
481 	case 14U:
482 		wl = 30U;
483 		break;
484 	case 15U:
485 		wl = 34U;
486 		break;
487 	default:
488 		wl = 4U;
489 		break;
490 	}
491 
492 	/* Program Seq0b_GPRx */
493 	regdata = (uint16_t)((rl - 5U + extradly) << CSR_ACSMRCASLAT_LSB);
494 	mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (P0 | C0 | TINITENG | R2 |
495 							CSR_SEQ0BGPR1_ADDR))),
496 		      regdata);
497 
498 	regdata = (uint16_t)(((wl + extradly) - 5U) << CSR_ACSMWCASLAT_LSB);
499 	mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (P0 | C0 | TINITENG | R2 |
500 							CSR_SEQ0BGPR2_ADDR))),
501 		      regdata);
502 
503 	regdata = (uint16_t)((rl - 5U + extradly + 4U + 8U) << CSR_ACSMRCASLAT_LSB);
504 	mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (P0 | C0 | TINITENG | R2 |
505 							CSR_SEQ0BGPR3_ADDR))),
506 		      regdata);
507 }
508 
509 /*
510  * Program hwtlpcsena and hwtlpcsenb based on number of ranks per channel
511  * Applicable only for LPDDR4.  These CSRs have no effect for DDR3/4.
512  *
513  * CSRs to program:
514  *      hwtlpcsena
515  *      hwtlpcsenb
516  *
517  * User input dependencies:
518  *      config->uib.numrank_dfi0
519  *      config->uib.numrank_dfi1
520  *      config->uib.dfi1exists
521  *      config->uib.numactivedbytedfi1
522  */
523 static void hwtlpcsen_program(struct stm32mp_ddr_config *config)
524 {
525 	uint16_t hwtlpcsena;
526 	uint16_t hwtlpcsenb;
527 
528 	/* Channel A - 1'b01 if signal-rank, 2'b11 if dual-rank */
529 	hwtlpcsena = (uint16_t)config->uib.numrank_dfi0 | 0x1U;
530 	mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (TMASTER | CSR_HWTLPCSENA_ADDR))),
531 		      hwtlpcsena);
532 
533 	/*
534 	 * Channel B - 1'b01 if signal-rank, 2'b11 if dual-rank
535 	 * If DFI1 exists but disabled, numrank_dfi0 is used to program CsEnB
536 	 */
537 	if ((config->uib.dfi1exists != 0U) && (config->uib.numactivedbytedfi1 == 0U)) {
538 		hwtlpcsenb = (uint16_t)config->uib.numrank_dfi0 | 0x1U;
539 		mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (TMASTER | CSR_HWTLPCSENB_ADDR))),
540 			      hwtlpcsenb);
541 	} else if ((config->uib.dfi1exists != 0U) && (config->uib.numactivedbytedfi1 > 0U)) {
542 		hwtlpcsenb = (uint16_t)config->uib.numrank_dfi1 | 0x1U;
543 		mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (TMASTER | CSR_HWTLPCSENB_ADDR))),
544 			      hwtlpcsenb);
545 	} else {
546 		/* Disable Channel B */
547 		hwtlpcsenb = 0x0U;
548 		mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (TMASTER | CSR_HWTLPCSENB_ADDR))),
549 			      hwtlpcsenb);
550 	}
551 }
552 
553 /*
554  * Program pptdqscntinvtrntg0 and pptdqscntinvtrntg1
555  * Calculated based on tDQS2DQ and Frequencey
556  * Applicable to LPDDR4 only
557  *
558  * 65536*(tdqs2dq_value_rank<rank>_chan<chan>*2)/(2*2048*UI(ps)_int)
559  *
560  * CSRs to program:
561  *      pptdqscntinvtrntg0
562  *      pptdqscntinvtrntg1
563  *
564  * User input dependencies:
565  *      config->uib.numrank_dfi0
566  *      config->uib.numrank_dfi1
567  *      config->uib.dfi1exists
568  *      config->uib.numdbyte
569  */
570 static void pptdqscntinvtrntg_program(struct stm32mp_ddr_config *config)
571 {
572 	uint32_t numrank_total = config->uib.numrank_dfi0;
573 	uint32_t rank;
574 
575 	/* Calculate total number of timing groups (ranks) */
576 	if (config->uib.dfi1exists != 0U) {
577 		numrank_total += config->uib.numrank_dfi1;
578 	}
579 
580 	/* Set per timing group */
581 	for (rank = 0U; rank < numrank_total; rank++) {
582 		uint32_t byte;
583 
584 		for (byte = 0U; byte < config->uib.numdbyte; byte++) {
585 			uint32_t c_addr;
586 
587 			c_addr = byte << 12;
588 			if (rank == 0U) {
589 				mmio_write_16((uintptr_t)
590 					      (DDRPHYC_BASE + (4U * (TDBYTE | c_addr |
591 							       CSR_PPTDQSCNTINVTRNTG0_ADDR))),
592 					      0U);
593 			} else if (rank == 1U) {
594 				mmio_write_16((uintptr_t)
595 					      (DDRPHYC_BASE + (4U * (TDBYTE | c_addr |
596 							       CSR_PPTDQSCNTINVTRNTG1_ADDR))),
597 					      0U);
598 			}
599 		}
600 	}
601 }
602 
603 /*
604  * CSRs to program:
605  *      PptCtlStatic:: DOCByteSelTg0/1
606  *                   :: pptenrxenbackoff
607  *
608  * User input dependencies::
609  *      config->uib.numdbyte
610  *      config->uib.numrank_dfi0
611  *      config->uib.numrank_dfi1
612  *      config->uia.lp4rxpreamblemode
613  *      config->uia.rxenbackoff
614  *      config->uia.drambyteswap
615  */
616 static void pptctlstatic_program(struct stm32mp_ddr_config *config)
617 {
618 	uint32_t byte;
619 	uint32_t pptenrxenbackoff;
620 
621 	/*
622 	 * The customer will setup some fields in this csr so the fw needs to do a
623 	 * read-modify-write here.
624 	 */
625 
626 	if (config->uia.lp4rxpreamblemode == 1U) {
627 		/* Rx-preamble mode for PS0 */
628 		/* Programming PptCtlStatic detected toggling preamble */
629 		pptenrxenbackoff = 0x1U; /* Toggling RD_PRE */
630 	} else {
631 		pptenrxenbackoff = config->uia.rxenbackoff; /* Static RD_PRE */
632 	}
633 
634 	for (byte = 0U; byte < config->uib.numdbyte; byte++) {
635 		uint32_t c_addr;
636 		uint16_t regdata;
637 		uint8_t pptentg1;
638 		uint32_t docbytetg0;
639 		uint32_t docbytetg1;
640 
641 		/* Each Dbyte could have a different configuration */
642 		c_addr = byte * C1;
643 		if ((byte % 2) == 0) {
644 			docbytetg0 = 0x1U & (config->uia.drambyteswap >> byte);
645 			docbytetg1 = 0x1U & (config->uia.drambyteswap >> byte);
646 		} else {
647 			docbytetg0 = 0x1U & (~(config->uia.drambyteswap >> byte));
648 			docbytetg1 = 0x1U & (~(config->uia.drambyteswap >> byte));
649 		}
650 
651 		pptentg1 = ((config->uib.numrank_dfi0 == 2U) || (config->uib.numrank_dfi1 == 2U)) ?
652 			   0x1U : 0x0U;
653 		regdata = (uint16_t)((0x1U << CSR_PPTENDQS2DQTG0_LSB) |
654 				     (pptentg1 << CSR_PPTENDQS2DQTG1_LSB) |
655 				     (0x1U << CSR_PPTENRXENDLYTG0_LSB) |
656 				     (pptentg1 << CSR_PPTENRXENDLYTG1_LSB) |
657 				     (pptenrxenbackoff << CSR_PPTENRXENBACKOFF_LSB) |
658 				     (docbytetg0 << CSR_DOCBYTESELTG0_LSB) |
659 				     (docbytetg1 << CSR_DOCBYTESELTG1_LSB));
660 
661 		mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (c_addr | TDBYTE |
662 								CSR_PPTCTLSTATIC_ADDR))),
663 			      regdata);
664 	}
665 }
666 #endif /* STM32MP_LPDDR4_TYPE */
667 
668 /*
669  * Program hwtcamode based on dram type
670  *
671  * CSRs to program:
672  *      hwtcamode::hwtlp3camode
673  *               ::hwtd4camode
674  *               ::hwtlp4camode
675  *               ::hwtd4altcamode
676  *               ::hwtcsinvert
677  *               ::hwtdbiinvert
678  */
679 static void hwtcamode_program(void)
680 {
681 	uint32_t hwtlp3camode = 0U;
682 	uint32_t hwtd4camode = 0U;
683 	uint32_t hwtlp4camode = 0U;
684 	uint32_t hwtd4altcamode = 0U;
685 	uint32_t hwtcsinvert = 0U;
686 	uint32_t hwtdbiinvert = 0U;
687 	uint16_t hwtcamode;
688 
689 #if STM32MP_DDR4_TYPE
690 	hwtd4camode = 1U;
691 #elif STM32MP_LPDDR4_TYPE
692 	hwtlp4camode = 1U;
693 	hwtcsinvert = 1U;
694 	hwtdbiinvert = 1U;
695 #else /* STM32MP_DDR3_TYPE */
696 	/* Nothing to declare */
697 #endif /* STM32MP_DDR4_TYPE */
698 
699 	hwtcamode = (uint16_t)((hwtdbiinvert << CSR_HWTDBIINVERT_LSB) |
700 			       (hwtcsinvert << CSR_HWTCSINVERT_LSB) |
701 			       (hwtd4altcamode << CSR_HWTD4ALTCAMODE_LSB) |
702 			       (hwtlp4camode << CSR_HWTLP4CAMODE_LSB) |
703 			       (hwtd4camode << CSR_HWTD4CAMODE_LSB) |
704 			       (hwtlp3camode << CSR_HWTLP3CAMODE_LSB));
705 
706 	mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (TMASTER | CSR_HWTCAMODE_ADDR))), hwtcamode);
707 }
708 
709 /*
710  * Program DllGainCtl and DllLockParam based on frequency
711  */
712 static void dllgainctl_dlllockparam_program(struct stm32mp_ddr_config *config)
713 {
714 	uint32_t dllgainiv;
715 	uint32_t dllgaintv;
716 	uint32_t lcdlseed;
717 	uint32_t memck_freq;
718 	uint32_t stepsize_x10 = 47U;	/*
719 					 * Nominal stepsize, in units of tenths of a ps,
720 					 * if nominal=4.7ps use 47
721 					 */
722 	uint16_t wddllgainctl;
723 	uint16_t wddlllockparam;
724 
725 	memck_freq = config->uib.frequency;
726 
727 	/*
728 	 * lcdlseed = ((1000000/memck_freq)/2)/lcdl_stepsize  ...
729 	 * where default lcdl_stepsize=4.7 in simulation.
730 	 */
731 	if (memck_freq >= 1200U) {
732 		dllgainiv = 0x04U;
733 		dllgaintv = 0x05U;
734 	} else if (memck_freq >= 800U) {
735 		dllgainiv = 0x03U;
736 		dllgaintv = 0x05U;
737 	} else if (memck_freq >= 532U) {
738 		dllgainiv = 0x02U;
739 		dllgaintv = 0x04U;
740 	} else if (memck_freq >= 332U) {
741 		dllgainiv = 0x01U;
742 		dllgaintv = 0x03U;
743 	} else {
744 		dllgainiv = 0x00U;
745 		dllgaintv = 0x02U;
746 	}
747 
748 	/*
749 	 * lcdlseed= (1000000/(2*memck_freq)) * (100/(120*(stepsize_nominal)));
750 	 * *100/105 is to bias the seed low.
751 	 */
752 	lcdlseed = (1000000U * 10U * 100U) / (2U * memck_freq * stepsize_x10 * 105U);
753 
754 	if (lcdlseed > (511U - 32U)) {
755 		lcdlseed = 511U - 32U;
756 	}
757 
758 	if (lcdlseed < 32U) {
759 		lcdlseed = 32U;
760 	}
761 
762 	wddllgainctl = (uint16_t)((CSR_DLLGAINTV_MASK & (dllgaintv << CSR_DLLGAINTV_LSB)) |
763 				  (CSR_DLLGAINIV_MASK & (dllgainiv << CSR_DLLGAINIV_LSB)));
764 
765 	mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (TMASTER | CSR_DLLGAINCTL_ADDR))),
766 		      wddllgainctl);
767 
768 	wddlllockparam = (uint16_t)((CSR_LCDLSEED0_MASK & (lcdlseed << CSR_LCDLSEED0_LSB)) |
769 				    (CSR_DISDLLGAINIVSEED_MASK & 0xFFFFU));
770 
771 	mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (TMASTER | CSR_DLLLOCKPARAM_ADDR))),
772 		      wddlllockparam);
773 }
774 
775 /*
776  * Program AcsmCtrl23 for Fw and Ppt.
777  *
778  * CSRs to program:
779  *   AcsmCtrl23::AcsmCsMask
780  *               AcsmCsMode
781  */
782 static void acsmctrl23_program(void)
783 {
784 	uint16_t regdata;
785 
786 	regdata = (0x0FU << CSR_ACSMCSMASK_LSB) | (0x1U << CSR_ACSMCSMODE_LSB);
787 	mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (C0 | TACSM | CSR_ACSMCTRL23_ADDR))),
788 		      regdata);
789 }
790 
791 /*
792  * Set PllForceCal to 1 and PllDacValIn to some arbitrary value
793  */
794 static void pllforcecal_plldacvalin_program(void)
795 {
796 	uint32_t dacval_in = 0x10U;
797 	uint32_t force_cal = 0x1U;
798 	uint32_t pllencal = 0x1U;
799 	uint32_t maxrange = 0x1FU;
800 	uint16_t pllctrl3_gpr;
801 	uint16_t pllctrl3_startup;
802 
803 	pllctrl3_startup = (uint16_t)((dacval_in << CSR_PLLDACVALIN_LSB) |
804 				      (maxrange << CSR_PLLMAXRANGE_LSB));
805 	pllctrl3_gpr = pllctrl3_startup | (uint16_t)((force_cal << CSR_PLLFORCECAL_LSB) |
806 						     (pllencal << CSR_PLLENCAL_LSB));
807 
808 	mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (TMASTER | CSR_PLLCTRL3_ADDR))),
809 		      pllctrl3_startup);
810 	mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (TINITENG | CSR_SEQ0BGPR6_ADDR))),
811 		      pllctrl3_gpr);
812 }
813 
814 /*
815  * This function programs registers that are normally set by training
816  * firmware.
817  *
818  * This function is used in place of running 1D or 1D training steps. PhyInit
819  * calls this function when skip_train = true. In that case, PhyInit does not
820  * execute training firmware and this function is called instead to program
821  * PHY registers according to DRAM timing parameters specified in userInput
822  * data structure. See documentation of ddrphy_phyinit_struct.h file
823  * details of timing parameters available in skip training.
824  *
825  * \warning ddrphy_phyinit_progcsrskiptrain() only supports zero board
826  * delay model. If system board delays are set or randomized, full 1D or 1D
827  * initialization flow must be executed.
828  *
829  * This function replaces these steps in the PHY Initialization sequence:
830  *  - (E) Set the PHY input clocks to the desired frequency
831  *  - (F) Write the Message Block parameters for the training firmware
832  *  - (G) Execute the Training Firmware
833  *  - (H) Read the Message Block results
834  *
835  * \returns \c void
836  */
837 void ddrphy_phyinit_progcsrskiptrain(struct stm32mp_ddr_config *config,
838 				     struct pmu_smb_ddr_1d *mb_ddr_1d, uint32_t ardptrinitval)
839 {
840 	uint16_t txdqsdly;
841 
842 	/*
843 	 * Program ATxDlY
844 	 * For DDR4, DDR3 and LPDDR4, leave AtxDly[6:0] at default (0x0)
845 	 */
846 
847 	dfimrl_program(config, mb_ddr_1d, ardptrinitval);
848 
849 	txdqsdlytg_program(config, mb_ddr_1d, &txdqsdly);
850 
851 	txdqdlytg_program(config, mb_ddr_1d, txdqsdly);
852 
853 	rxendly_program(config, mb_ddr_1d);
854 
855 #if STM32MP_LPDDR4_TYPE
856 	seq0bgpr_program(config);
857 
858 	hwtlpcsen_program(config);
859 
860 	pptdqscntinvtrntg_program(config);
861 
862 	pptctlstatic_program(config);
863 #endif /* STM32MP_LPDDR4_TYPE */
864 
865 	hwtcamode_program();
866 
867 	dllgainctl_dlllockparam_program(config);
868 
869 	acsmctrl23_program();
870 
871 	pllforcecal_plldacvalin_program();
872 
873 	/*
874 	 * ##############################################################
875 	 *
876 	 * Setting PhyInLP3 to 0 to cause PIE to execute LP2 sequence instead of INIT on first
877 	 * dfi_init_start.
878 	 * This prevents any DRAM commands before DRAM is initialized, which is the case for
879 	 * skip_train.
880 	 *
881 	 * Moved to here from dddrphy_phyinit_I_loadPIEImage()
882 	 * These should not be needed on S3-exit
883 	 *
884 	 * Note this executes for SkipTrain only, *not* DevInit+SkipTrain
885 	 * DevInit+SkipTrain already initializes DRAM and thus don't need to avoid DRAM commands
886 	 *
887 	 * ##############################################################
888 	 */
889 
890 	/*
891 	 * Special skipTraining configuration to Prevent DRAM Commands on the first dfi
892 	 * status interface handshake. In order to see this behavior, the first dfi_freq
893 	 * should be in the range of 0x0f < dfi_freq_sel[4:0] < 0x14.
894 	 */
895 	mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (TINITENG | CSR_PHYINLP3_ADDR))), 0x0U);
896 }
897