xref: /rk3399_ARM-atf/drivers/st/ddr/phy/phyinit/src/ddrphy_phyinit_progcsrskiptrain.c (revision f1f04d786795a02f81abeef7d1513e93098591ed)
1 /*
2  * Copyright (C) 2021-2024, STMicroelectronics - All Rights Reserved
3  *
4  * SPDX-License-Identifier: BSD-3-Clause
5  */
6 
7 #include <arch_helpers.h>
8 
9 #include <common/debug.h>
10 
11 #include <ddrphy_phyinit.h>
12 #include <ddrphy_wrapper.h>
13 
14 #include <lib/mmio.h>
15 
16 #include <platform_def.h>
17 
18 struct phyinit_timings {
19 	int tstaoff;
20 	int tpdm;
21 	int tcasl_add;
22 };
23 
24 static struct phyinit_timings timings;
25 
26 /*
27  * Program dfimrl according to this formula:
28  *
29  *         dfimrl = ceiling( (ARdPtrinitval*UI + phy_tx_insertion_dly +
30  *                            phy_rx_insertion_dly + PHY_Rx_Fifo_dly + tDQSCK + tstaoff) /
31  *                           dficlk_period)
32  *
33  * All terms in above equation specified in ps
34  * tDQSCK - determine from memory model
35  * tstaoff - determine from memory model
36  * phy_tx_insertion_dly = 200ps
37  * phy_rx_insertion_dly = 200ps
38  * phy_rx_fifo_dly      = 200ps + 4UI
39  */
40 static void dfimrl_program(struct stm32mp_ddr_config *config, struct pmu_smb_ddr_1d *mb_ddr_1d,
41 			   int ardptrinitval)
42 {
43 	uint32_t byte;
44 	int dfimrl_in_dficlk;
45 	int phy_rx_fifo_dly;
46 	int phy_rx_insertion_dly = 200;
47 	int phy_tx_insertion_dly = 200;
48 	long long dficlk_period_x1000;
49 	long long dfimrl_in_fs;
50 	long long uifs;
51 	uint16_t dfimrl;
52 
53 	uifs = (1000 * 1000000) / ((int)config->uib.frequency * 2);
54 	dficlk_period_x1000 = 4 * uifs;
55 
56 	phy_rx_fifo_dly = (int)(((200 * 1000) + (4 * uifs)) / 1000);
57 
58 	dfimrl_in_fs = (ardptrinitval * uifs) +
59 		       ((phy_tx_insertion_dly + phy_rx_insertion_dly + phy_rx_fifo_dly +
60 			 timings.tstaoff + timings.tcasl_add + timings.tpdm) * 1000);
61 
62 	dfimrl_in_dficlk = (int)(dfimrl_in_fs / dficlk_period_x1000);
63 	if ((dfimrl_in_fs % dficlk_period_x1000) != 0) {
64 		dfimrl_in_dficlk++;
65 	}
66 	dfimrl = (uint16_t)(dfimrl_in_dficlk + mb_ddr_1d->dfimrlmargin);
67 
68 	/*
69 	 * mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (TDBYTE | CBRD | CSR_DFIMRL_ADDR))),
70 	 *               dfimrl);
71 	 */
72 	for (byte = 0U; byte < config->uib.numdbyte; byte++) {
73 		uint32_t c_addr;
74 
75 		c_addr = byte << 12;
76 		mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (TDBYTE | c_addr |
77 								CSR_DFIMRL_ADDR))),
78 			      dfimrl);
79 	}
80 
81 	mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (TMASTER | CSR_HWTMRL_ADDR))), dfimrl);
82 }
83 
84 /*
85  * Program txdqsdlytg0/1[9:0]:
86  *
87  *         txdqsdlytg*[9:6] = floor( (4*UI + tstaoff) / UI)
88  *         txdqsdlytg*[5:0] = ceiling( (tstaoff%UI / UI) * 32)
89  *
90  * tstaoff and UI expressed in ps
91  *
92  * For HMD and LPDDR4X and MEMCLK <= 533 mhz:
93  *    txdqsdlytg*[9:6] = 0x5
94  *
95  * For other dimm types, leave TDqsDlyTg*[9:0] at default 0x100
96  *
97  * ppp_0001_cccc_uuuu_1101_0000
98  *
99  * if DDR3 or DDR4
100  *      num_timingroup = numrank_dfi0;
101  * else
102  *      num_timingroup = numrank_dfi0 + numrank_dfi1 * dfi1exists;
103  */
104 static void txdqsdlytg_program(struct stm32mp_ddr_config *config, struct pmu_smb_ddr_1d *mb_ddr_1d,
105 			       uint16_t *txdqsdly)
106 {
107 	uint32_t byte;
108 	int txdqsdlytg_5to0; /* Fine delay - 1/32UI per increment */
109 	int txdqsdlytg_9to6; /* Coarse delay - 1UI per increment */
110 	int txdqsdlytg_fine_default = 0;
111 	int txdqsdlytg_coarse_default = 4;
112 	long long tmp_value;
113 	long long uifs;
114 
115 	uifs = (1000 * 1000000) / ((int)config->uib.frequency * 2);
116 
117 	txdqsdlytg_9to6 = (int)(((int)((txdqsdlytg_coarse_default * uifs) / 1000) +
118 				 timings.tstaoff + timings.tcasl_add
119 				 - timings.tpdm) / (int)(uifs / 1000));
120 
121 	tmp_value = fmodll(((txdqsdlytg_fine_default * uifs / 32) +
122 			    ((timings.tstaoff + timings.tcasl_add) * 1000) -
123 			    (timings.tpdm * 1000)),
124 			   uifs);
125 	txdqsdlytg_5to0 = (int)(tmp_value / uifs * 32);
126 	if ((tmp_value % uifs) != 0) {
127 		txdqsdlytg_5to0++;
128 	}
129 
130 	/* Bit-5 of LCDL is no longer used, so bumping bit-5 of fine_dly up to coarse_dly */
131 	if (txdqsdlytg_5to0 >= 32) {
132 		txdqsdlytg_9to6 = txdqsdlytg_9to6 + 1;
133 		txdqsdlytg_5to0 = txdqsdlytg_5to0 - 32;
134 	}
135 
136 	*txdqsdly = (uint16_t)((txdqsdlytg_9to6 << 6) | txdqsdlytg_5to0);
137 
138 	for (byte = 0U; byte < config->uib.numdbyte; byte++) {
139 		uint32_t c_addr;
140 		uint32_t nibble;
141 
142 		c_addr = byte << 12;
143 		for (nibble = 0U; nibble < 2U; nibble++) {
144 			uint32_t u_addr;
145 
146 			if (ddrphy_phyinit_isdbytedisabled(config, mb_ddr_1d, byte) != 0) {
147 				continue;
148 			}
149 
150 			u_addr = nibble << 8;
151 
152 #if STM32MP_DDR3_TYPE || STM32MP_DDR4_TYPE
153 			if ((mb_ddr_1d->cspresent & 0x1U) != 0U) {
154 #else /* STM32MP_LPDDR4_TYPE */
155 			if (((mb_ddr_1d->cspresentcha & 0x1U) |
156 			     (mb_ddr_1d->cspresentchb & 0x1U)) != 0U) {
157 #endif /* STM32MP_DDR3_TYPE || STM32MP_DDR4_TYPE */
158 				mmio_write_16((uintptr_t)
159 					      (DDRPHYC_BASE + (4U * (TDBYTE | c_addr | u_addr |
160 								     CSR_TXDQSDLYTG0_ADDR))),
161 					      *txdqsdly);
162 			}
163 
164 #if STM32MP_LPDDR4_TYPE
165 			if ((((mb_ddr_1d->cspresentcha & 0x2U) >> 1) |
166 			     ((mb_ddr_1d->cspresentchb & 0x2U) >> 1)) != 0U) {
167 				mmio_write_16((uintptr_t)
168 					      (DDRPHYC_BASE + (4U * (TDBYTE | c_addr | u_addr |
169 								     CSR_TXDQSDLYTG1_ADDR))),
170 					      *txdqsdly);
171 			}
172 #endif /* STM32MP_LPDDR4_TYPE */
173 		}
174 	}
175 }
176 
177 /*
178  * ##############################################################
179  *
180  * Program txdqdlyTg0/1[8:0]:
181  *
182  *     txdqdlyTg*[8:6] = floor( (txdqsdlytg*[5:0]*UI/32 + tDQS2DQ + 0.5UI) / UI)
183  *     txdqdlyTg*[5:0] = ceil( ((txdqsdlytg*[5:0]*UI/32 + tDQS2DQ + 0.5UI)%UI / UI) * 32)
184  *
185  * ##############################################################
186  */
187 static void txdqdlytg_program(struct stm32mp_ddr_config *config, struct pmu_smb_ddr_1d *mb_ddr_1d,
188 			      uint16_t txdqsdly)
189 {
190 	uint32_t byte;
191 	int txdqdly_5to0; /* Fine delay - 1/32UI per increment */
192 	int txdqdly_8to6; /* Coarse delay - 1UI per increment */
193 	int txdqsdlytg_5to0; /* Fine delay - 1/32UI per increment */
194 	long long tmp_value;
195 	long long uifs;
196 	uint16_t txdqdly;
197 
198 	uifs = (1000 * 1000000) / ((int)config->uib.frequency * 2);
199 
200 	txdqsdlytg_5to0 = (int)txdqsdly & 0x3F;
201 
202 	txdqdly_8to6 = (int)(((txdqsdlytg_5to0 * uifs / 32) + (uifs / 2)) / uifs);
203 	tmp_value = fmodll(((txdqsdlytg_5to0 * uifs / 32) + (uifs / 2)), uifs);
204 	txdqdly_5to0 = (int)(((tmp_value * 32) / uifs));
205 	if ((tmp_value % uifs) != 0) {
206 		txdqdly_5to0++;
207 	}
208 
209 	/* Bit-5 of LCDL is no longer used, so bumping bit-5 of fine_dly up to coarse_dly */
210 	if (txdqdly_5to0 >= 32) {
211 		txdqdly_8to6 = txdqdly_8to6 + 1;
212 		txdqdly_5to0 = txdqdly_5to0 - 32;
213 	}
214 
215 	txdqdly = (uint16_t)((txdqdly_8to6 << 6) | txdqdly_5to0);
216 
217 	for (byte = 0U; byte < config->uib.numdbyte; byte++) {
218 		uint32_t c_addr;
219 		uint32_t lane;
220 
221 		c_addr = byte << 12;
222 		for (lane = 0U; lane < 9U; lane++) {
223 			uint32_t r_addr;
224 
225 			if (ddrphy_phyinit_isdbytedisabled(config, mb_ddr_1d, byte) != 0) {
226 				continue;
227 			}
228 
229 			r_addr = lane << 8;
230 
231 #if STM32MP_DDR3_TYPE || STM32MP_DDR4_TYPE
232 			if ((mb_ddr_1d->cspresent & 0x1U) != 0U) {
233 #else /* STM32MP_LPDDR4_TYPE */
234 			if (((mb_ddr_1d->cspresentcha & 0x1U) |
235 			     (mb_ddr_1d->cspresentchb & 0x1U)) != 0U) {
236 #endif /* STM32MP_DDR3_TYPE || STM32MP_DDR4_TYPE */
237 				mmio_write_16((uintptr_t)
238 					      (DDRPHYC_BASE + (4U * (TDBYTE | c_addr | r_addr |
239 								     CSR_TXDQDLYTG0_ADDR))),
240 					      txdqdly);
241 			}
242 
243 #if STM32MP_LPDDR4_TYPE
244 			if ((((mb_ddr_1d->cspresentcha & 0x2U) >> 1) |
245 			     ((mb_ddr_1d->cspresentchb & 0x2U) >> 1)) != 0U) {
246 				mmio_write_16((uintptr_t)
247 					      (DDRPHYC_BASE + (4U * (TDBYTE | c_addr | r_addr |
248 								     CSR_TXDQDLYTG1_ADDR))),
249 					      txdqdly);
250 			}
251 #endif /* STM32MP_LPDDR4_TYPE */
252 		}
253 	}
254 }
255 
256 /*
257  * Program rxendly0/1[10:0]:
258  *
259  *         rxendly[10:6] = floor( (4*UI + tDQSCK + tstaoff) / UI)
260  *         rxendly[5:0]  = ceil( ((tDQSCK + tstaoff) % UI) * 32)
261  *
262  * tDQSCK, tstaoff and UI expressed in ps
263  */
264 static void rxendly_program(struct stm32mp_ddr_config *config, struct pmu_smb_ddr_1d *mb_ddr_1d)
265 {
266 	int rxendly_coarse_default = 4;
267 	int rxendly_fine_default = 0;
268 
269 	int backoff_x1000 __maybe_unused;
270 	int zerobackoff_x1000 __maybe_unused;
271 	uint32_t byte;
272 	int rxendly_10to6; /* Coarse delay - 1UI per increment */
273 	int rxendly_5to0; /* Fine delay - 1/32UI per increment */
274 	int totfinestep;
275 	long long finestepfs; /* Fine steps in fs */
276 	long long rxendly_offset_x1000000 = 0; /* 0 Offset is 1UI before the first DQS. */
277 	long long totfs;
278 	long long uifs;
279 	uint16_t rxendly;
280 
281 	uifs = (1000 * 1000000) / ((int)config->uib.frequency * 2);
282 
283 #if STM32MP_LPDDR4_TYPE
284 	/* Compensate for pptenrxenbackoff */
285 	zerobackoff_x1000 = (1000 * 24) / 32;
286 	if (config->uia.lp4rxpreamblemode == 1U) {
287 		backoff_x1000 = 1000 - ((1000 * 2) / 32);
288 	} else {
289 		backoff_x1000 = (1000 * (int)config->uia.rxenbackoff) - ((1000 * 2) / 32);
290 	}
291 
292 	if (config->uia.disableretraining == 0U) {
293 		rxendly_offset_x1000000 = config->uib.frequency < 333U ?
294 					  backoff_x1000 * uifs : zerobackoff_x1000 * uifs;
295 	} else {
296 		rxendly_offset_x1000000 = zerobackoff_x1000 * uifs;
297 	}
298 #endif /* STM32MP_LPDDR4_TYPE */
299 
300 	finestepfs = uifs / 32;
301 	totfs = ((32 * rxendly_coarse_default * finestepfs) +
302 		 (rxendly_fine_default * finestepfs) +
303 		 ((timings.tstaoff + timings.tcasl_add +
304 		   timings.tpdm) * 1000) + (rxendly_offset_x1000000 / 1000));
305 	totfinestep = totfs / finestepfs;
306 
307 	rxendly_10to6 = totfinestep / 32;
308 	rxendly_5to0  = fmodi(totfinestep, 32);
309 
310 	/* Bit-5 of LCDL is no longer used, so bumping bit-5 of fine_dly up to coarse_dly */
311 	if (rxendly_5to0 >= 32) {
312 		rxendly_10to6 = rxendly_10to6 + 1;
313 		rxendly_5to0 = rxendly_5to0 - 32;
314 	}
315 
316 	rxendly = (uint16_t)((rxendly_10to6 << 6) | rxendly_5to0);
317 
318 	for (byte = 0U; byte < config->uib.numdbyte; byte++) {
319 		uint32_t c_addr;
320 		uint32_t nibble;
321 
322 		c_addr = byte << 12;
323 		for (nibble = 0U; nibble < 2U; nibble++) {
324 			uint32_t u_addr;
325 
326 			if (ddrphy_phyinit_isdbytedisabled(config, mb_ddr_1d, byte) != 0) {
327 				continue;
328 			}
329 
330 			u_addr = nibble << 8;
331 
332 #if STM32MP_DDR3_TYPE || STM32MP_DDR4_TYPE
333 			if ((mb_ddr_1d->cspresent & 0x1U) != 0) {
334 #else /* STM32MP_LPDDR4_TYPE */
335 			if (((mb_ddr_1d->cspresentcha & 0x1U) |
336 			     (mb_ddr_1d->cspresentchb & 0x1U)) != 0U) {
337 #endif /* STM32MP_DDR3_TYPE || STM32MP_DDR4_TYPE */
338 				mmio_write_16((uintptr_t)
339 					      (DDRPHYC_BASE + (4U * (TDBYTE | c_addr | u_addr |
340 								     CSR_RXENDLYTG0_ADDR))),
341 					      rxendly);
342 			}
343 
344 #if STM32MP_LPDDR4_TYPE
345 			if ((((mb_ddr_1d->cspresentcha & 0x2U) >> 1) |
346 			     ((mb_ddr_1d->cspresentchb & 0x2U) >> 1)) != 0U) {
347 				mmio_write_16((uintptr_t)
348 					      (DDRPHYC_BASE + (4U * (TDBYTE | c_addr | u_addr |
349 								     CSR_RXENDLYTG1_ADDR))),
350 					      rxendly);
351 			}
352 #endif /* STM32MP_LPDDR4_TYPE */
353 		}
354 	}
355 }
356 
357 #if STM32MP_LPDDR4_TYPE
358 /*
359  * Programming Seq0BGPR1/2/3 for LPDDR4
360  */
361 static void seq0bgpr_program(struct stm32mp_ddr_config *config)
362 {
363 	uint32_t extradly = 3U;
364 	uint32_t rl = 0U; /* Computed read latency */
365 	uint32_t wl = 0U; /* Computed write latency */
366 	uint16_t mr_dbi_rd; /* Extracted field from MR */
367 	uint16_t mr_rl;
368 	uint16_t mr_wl;
369 	uint16_t mr_wls;
370 	uint16_t regdata;
371 
372 	mr_rl = (uint16_t)config->uia.lp4rl;	/* RL[2:0] */
373 	mr_wl = (uint16_t)config->uia.lp4wl;	/* WL[5:3] */
374 	mr_wls = (uint16_t)config->uia.lp4wls;	/* WLS */
375 	mr_dbi_rd = (uint16_t)config->uia.lp4dbird; /* DBI-RD */
376 
377 	switch ((mr_dbi_rd << 3) | mr_rl) {
378 		/* DBI-RD Disabled */
379 	case  0U:
380 		rl = 6U;
381 		break;
382 	case  1U:
383 		rl = 10U;
384 		break;
385 	case  2U:
386 		rl = 14U;
387 		break;
388 	case  3U:
389 		rl = 20U;
390 		break;
391 	case  4U:
392 		rl = 24U;
393 		break;
394 	case  5U:
395 		rl = 28U;
396 		break;
397 	case  6U:
398 		rl = 32U;
399 		break;
400 	case  7U:
401 		rl = 36U;
402 		break;
403 		/* DBI-RD Enabled */
404 	case  8U:
405 		rl = 6U;
406 		break;
407 	case  9U:
408 		rl = 12U;
409 		break;
410 	case 10U:
411 		rl = 16U;
412 		break;
413 	case 11U:
414 		rl = 22U;
415 		break;
416 	case 12U:
417 		rl = 28U;
418 		break;
419 	case 13U:
420 		rl = 32U;
421 		break;
422 	case 14U:
423 		rl = 36U;
424 		break;
425 	case 15U:
426 		rl = 40U;
427 		break;
428 	default:
429 		rl = 6U;
430 		break;
431 	}
432 
433 	switch ((mr_wls << 3) | mr_wl) {
434 		/* DBI-RD Disabled */
435 	case  0U:
436 		wl = 4U;
437 		break;
438 	case  1U:
439 		wl = 6U;
440 		break;
441 	case  2U:
442 		wl = 8U;
443 		break;
444 	case  3U:
445 		wl = 10U;
446 		break;
447 	case  4U:
448 		wl = 12U;
449 		break;
450 	case  5U:
451 		wl = 14U;
452 		break;
453 	case  6U:
454 		wl = 16U;
455 		break;
456 	case  7U:
457 		wl = 18U;
458 		break;
459 		/* DBI-RD Enabled */
460 	case  8U:
461 		wl = 4U;
462 		break;
463 	case  9U:
464 		wl = 8U;
465 		break;
466 	case 10U:
467 		wl = 12U;
468 		break;
469 	case 11U:
470 		wl = 18U;
471 		break;
472 	case 12U:
473 		wl = 22U;
474 		break;
475 	case 13U:
476 		wl = 26U;
477 		break;
478 	case 14U:
479 		wl = 30U;
480 		break;
481 	case 15U:
482 		wl = 34U;
483 		break;
484 	default:
485 		wl = 4U;
486 		break;
487 	}
488 
489 	/* Program Seq0b_GPRx */
490 	regdata = (uint16_t)((rl - 5U + extradly) << CSR_ACSMRCASLAT_LSB);
491 	mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (P0 | C0 | TINITENG | R2 |
492 							CSR_SEQ0BGPR1_ADDR))),
493 		      regdata);
494 
495 	regdata = (uint16_t)((wl - 5U + extradly) << CSR_ACSMWCASLAT_LSB);
496 	mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (P0 | C0 | TINITENG | R2 |
497 							CSR_SEQ0BGPR2_ADDR))),
498 		      regdata);
499 
500 	regdata = (uint16_t)((rl - 5U + extradly + 4U + 8U) << CSR_ACSMRCASLAT_LSB);
501 	mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (P0 | C0 | TINITENG | R2 |
502 							CSR_SEQ0BGPR3_ADDR))),
503 		      regdata);
504 }
505 
506 /*
507  * Program hwtlpcsena and hwtlpcsenb based on number of ranks per channel
508  * Applicable only for LPDDR4.  These CSRs have no effect for DDR3/4.
509  *
510  * CSRs to program:
511  *      hwtlpcsena
512  *      hwtlpcsenb
513  *
514  * User input dependencies:
515  *      config->uib.numrank_dfi0
516  *      config->uib.numrank_dfi1
517  *      config->uib.dfi1exists
518  *      config->uib.numactivedbytedfi1
519  */
520 static void hwtlpcsen_program(struct stm32mp_ddr_config *config)
521 {
522 	uint16_t hwtlpcsena;
523 	uint16_t hwtlpcsenb;
524 
525 	/* Channel A - 1'b01 if signal-rank, 2'b11 if dual-rank */
526 	hwtlpcsena = (uint16_t)config->uib.numrank_dfi0 | 0x1U;
527 	mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (TMASTER | CSR_HWTLPCSENA_ADDR))),
528 		      hwtlpcsena);
529 
530 	/*
531 	 * Channel B - 1'b01 if signal-rank, 2'b11 if dual-rank
532 	 * If DFI1 exists but disabled, numrank_dfi0 is used to program CsEnB
533 	 */
534 	if ((config->uib.dfi1exists != 0U) && (config->uib.numactivedbytedfi1 == 0U)) {
535 		hwtlpcsenb = (uint16_t)config->uib.numrank_dfi0 | 0x1U;
536 		mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (TMASTER | CSR_HWTLPCSENB_ADDR))),
537 			      hwtlpcsenb);
538 	} else if ((config->uib.dfi1exists != 0U) && (config->uib.numactivedbytedfi1 > 0U)) {
539 		hwtlpcsenb = (uint16_t)config->uib.numrank_dfi1 | 0x1U;
540 		mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (TMASTER | CSR_HWTLPCSENB_ADDR))),
541 			      hwtlpcsenb);
542 	} else {
543 		/* Disable Channel B */
544 		hwtlpcsenb = 0x0U;
545 		mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (TMASTER | CSR_HWTLPCSENB_ADDR))),
546 			      hwtlpcsenb);
547 	}
548 }
549 
550 /*
551  * Program pptdqscntinvtrntg0 and pptdqscntinvtrntg1
552  * Calculated based on tDQS2DQ and Frequencey
553  * Applicable to LPDDR4 only
554  *
555  * 65536*(tdqs2dq_value_rank<rank>_chan<chan>*2)/(2*2048*UI(ps)_int)
556  *
557  * CSRs to program:
558  *      pptdqscntinvtrntg0
559  *      pptdqscntinvtrntg1
560  *
561  * User input dependencies:
562  *      config->uib.numrank_dfi0
563  *      config->uib.numrank_dfi1
564  *      config->uib.dfi1exists
565  *      config->uib.numdbyte
566  */
567 static void pptdqscntinvtrntg_program(struct stm32mp_ddr_config *config)
568 {
569 	uint32_t numrank_total = config->uib.numrank_dfi0;
570 	uint32_t rank;
571 
572 	/* Calculate total number of timing groups (ranks) */
573 	if (config->uib.dfi1exists != 0U) {
574 		numrank_total += config->uib.numrank_dfi1;
575 	}
576 
577 	/* Set per timing group */
578 	for (rank = 0U; rank < numrank_total; rank++) {
579 		uint32_t byte;
580 
581 		for (byte = 0U; byte < config->uib.numdbyte; byte++) {
582 			uint32_t c_addr;
583 
584 			c_addr = byte << 12;
585 			if (rank == 0U) {
586 				mmio_write_16((uintptr_t)
587 					      (DDRPHYC_BASE + (4U * (TDBYTE | c_addr |
588 							       CSR_PPTDQSCNTINVTRNTG0_ADDR))),
589 					      0U);
590 			} else if (rank == 1U) {
591 				mmio_write_16((uintptr_t)
592 					      (DDRPHYC_BASE + (4U * (TDBYTE | c_addr |
593 							       CSR_PPTDQSCNTINVTRNTG1_ADDR))),
594 					      0U);
595 			}
596 		}
597 	}
598 }
599 
600 /*
601  * CSRs to program:
602  *      PptCtlStatic:: DOCByteSelTg0/1
603  *                   :: pptenrxenbackoff
604  *
605  * User input dependencies::
606  *      config->uib.numdbyte
607  *      config->uib.numrank_dfi0
608  *      config->uib.numrank_dfi1
609  *      config->uia.lp4rxpreamblemode
610  *      config->uia.rxenbackoff
611  *      config->uia.drambyteswap
612  */
613 static void pptctlstatic_program(struct stm32mp_ddr_config *config)
614 {
615 	uint32_t byte;
616 	uint32_t pptenrxenbackoff;
617 
618 	/*
619 	 * The customer will setup some fields in this csr so the fw needs to do a
620 	 * read-modify-write here.
621 	 */
622 
623 	if (config->uia.lp4rxpreamblemode == 1U) {
624 		/* Rx-preamble mode for PS0 */
625 		/* Programming PptCtlStatic detected toggling preamble */
626 		pptenrxenbackoff = 0x1U; /* Toggling RD_PRE */
627 	} else {
628 		pptenrxenbackoff = config->uia.rxenbackoff; /* Static RD_PRE */
629 	}
630 
631 	for (byte = 0U; byte < config->uib.numdbyte; byte++) {
632 		uint32_t c_addr;
633 		uint16_t regdata;
634 		uint8_t pptentg1;
635 		uint32_t docbytetg0;
636 		uint32_t docbytetg1;
637 
638 		/* Each Dbyte could have a different configuration */
639 		c_addr = byte * C1;
640 		if ((byte % 2) == 0) {
641 			docbytetg0 = 0x1U & (config->uia.drambyteswap >> byte);
642 			docbytetg1 = 0x1U & (config->uia.drambyteswap >> byte);
643 		} else {
644 			docbytetg0 = 0x1U & (~(config->uia.drambyteswap >> byte));
645 			docbytetg1 = 0x1U & (~(config->uia.drambyteswap >> byte));
646 		}
647 
648 		pptentg1 = ((config->uib.numrank_dfi0 == 2U) || (config->uib.numrank_dfi1 == 2U)) ?
649 			   0x1U : 0x0U;
650 		regdata = (uint16_t)((0x1U << CSR_PPTENDQS2DQTG0_LSB) |
651 				     (pptentg1 << CSR_PPTENDQS2DQTG1_LSB) |
652 				     (0x1U << CSR_PPTENRXENDLYTG0_LSB) |
653 				     (pptentg1 << CSR_PPTENRXENDLYTG1_LSB) |
654 				     (pptenrxenbackoff << CSR_PPTENRXENBACKOFF_LSB) |
655 				     (docbytetg0 << CSR_DOCBYTESELTG0_LSB) |
656 				     (docbytetg1 << CSR_DOCBYTESELTG1_LSB));
657 
658 		mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (c_addr | TDBYTE |
659 								CSR_PPTCTLSTATIC_ADDR))),
660 			      regdata);
661 	}
662 }
663 #endif /* STM32MP_LPDDR4_TYPE */
664 
665 /*
666  * Program hwtcamode based on dram type
667  *
668  * CSRs to program:
669  *      hwtcamode::hwtlp3camode
670  *               ::hwtd4camode
671  *               ::hwtlp4camode
672  *               ::hwtd4altcamode
673  *               ::hwtcsinvert
674  *               ::hwtdbiinvert
675  */
676 static void hwtcamode_program(void)
677 {
678 	uint32_t hwtlp3camode = 0U;
679 	uint32_t hwtd4camode = 0U;
680 	uint32_t hwtlp4camode = 0U;
681 	uint32_t hwtd4altcamode = 0U;
682 	uint32_t hwtcsinvert = 0U;
683 	uint32_t hwtdbiinvert = 0U;
684 	uint16_t hwtcamode;
685 
686 #if STM32MP_DDR4_TYPE
687 	hwtd4camode = 1U;
688 #elif STM32MP_LPDDR4_TYPE
689 	hwtlp4camode = 1U;
690 	hwtcsinvert = 1U;
691 	hwtdbiinvert = 1U;
692 #else /* STM32MP_DDR3_TYPE */
693 	/* Nothing to declare */
694 #endif /* STM32MP_DDR4_TYPE */
695 
696 	hwtcamode = (uint16_t)((hwtdbiinvert << CSR_HWTDBIINVERT_LSB) |
697 			       (hwtcsinvert << CSR_HWTCSINVERT_LSB) |
698 			       (hwtd4altcamode << CSR_HWTD4ALTCAMODE_LSB) |
699 			       (hwtlp4camode << CSR_HWTLP4CAMODE_LSB) |
700 			       (hwtd4camode << CSR_HWTD4CAMODE_LSB) |
701 			       (hwtlp3camode << CSR_HWTLP3CAMODE_LSB));
702 
703 	mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (TMASTER | CSR_HWTCAMODE_ADDR))), hwtcamode);
704 }
705 
706 /*
707  * Program DllGainCtl and DllLockParam based on frequency
708  */
709 static void dllgainctl_dlllockparam_program(struct stm32mp_ddr_config *config)
710 {
711 	uint32_t dllgainiv;
712 	uint32_t dllgaintv;
713 	uint32_t lcdlseed;
714 	uint32_t memck_freq;
715 	uint32_t stepsize_x10 = 47U;	/*
716 					 * Nominal stepsize, in units of tenths of a ps,
717 					 * if nominal=4.7ps use 47
718 					 */
719 	uint16_t wddllgainctl;
720 	uint16_t wddlllockparam;
721 
722 	memck_freq = config->uib.frequency;
723 
724 	/*
725 	 * lcdlseed = ((1000000/memck_freq)/2)/lcdl_stepsize  ...
726 	 * where default lcdl_stepsize=4.7 in simulation.
727 	 */
728 	if (memck_freq >= 1200U) {
729 		dllgainiv = 0x04U;
730 		dllgaintv = 0x05U;
731 	} else if (memck_freq >= 800U) {
732 		dllgainiv = 0x03U;
733 		dllgaintv = 0x05U;
734 	} else if (memck_freq >= 532U) {
735 		dllgainiv = 0x02U;
736 		dllgaintv = 0x04U;
737 	} else if (memck_freq >= 332U) {
738 		dllgainiv = 0x01U;
739 		dllgaintv = 0x03U;
740 	} else {
741 		dllgainiv = 0x00U;
742 		dllgaintv = 0x02U;
743 	}
744 
745 	/*
746 	 * lcdlseed= (1000000/(2*memck_freq)) * (100/(120*(stepsize_nominal)));
747 	 * *100/105 is to bias the seed low.
748 	 */
749 	lcdlseed = (1000000U * 10U * 100U) / (2U * memck_freq * stepsize_x10 * 105U);
750 
751 	if (lcdlseed > (511U - 32U)) {
752 		lcdlseed = 511U - 32U;
753 	}
754 
755 	if (lcdlseed < 32U) {
756 		lcdlseed = 32U;
757 	}
758 
759 	wddllgainctl = (uint16_t)((CSR_DLLGAINTV_MASK & (dllgaintv << CSR_DLLGAINTV_LSB)) |
760 				  (CSR_DLLGAINIV_MASK & (dllgainiv << CSR_DLLGAINIV_LSB)));
761 
762 	mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (TMASTER | CSR_DLLGAINCTL_ADDR))),
763 		      wddllgainctl);
764 
765 	wddlllockparam = (uint16_t)((CSR_LCDLSEED0_MASK & (lcdlseed << CSR_LCDLSEED0_LSB)) |
766 				    (CSR_DISDLLGAINIVSEED_MASK & 0xFFFFU));
767 
768 	mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (TMASTER | CSR_DLLLOCKPARAM_ADDR))),
769 		      wddlllockparam);
770 }
771 
772 /*
773  * Program AcsmCtrl23 for Fw and Ppt.
774  *
775  * CSRs to program:
776  *   AcsmCtrl23::AcsmCsMask
777  *               AcsmCsMode
778  */
779 static void acsmctrl23_program(void)
780 {
781 	uint16_t regdata;
782 
783 	regdata = (0x0FU << CSR_ACSMCSMASK_LSB) | (0x1U << CSR_ACSMCSMODE_LSB);
784 	mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (C0 | TACSM | CSR_ACSMCTRL23_ADDR))),
785 		      regdata);
786 }
787 
788 /*
789  * Set PllForceCal to 1 and PllDacValIn to some arbitrary value
790  */
791 static void pllforcecal_plldacvalin_program(void)
792 {
793 	uint32_t dacval_in = 0x10U;
794 	uint32_t force_cal = 0x1U;
795 	uint32_t pllencal = 0x1U;
796 	uint32_t maxrange = 0x1FU;
797 	uint16_t pllctrl3_gpr;
798 	uint16_t pllctrl3_startup;
799 
800 	pllctrl3_startup = (uint16_t)((dacval_in << CSR_PLLDACVALIN_LSB) |
801 				      (maxrange << CSR_PLLMAXRANGE_LSB));
802 	pllctrl3_gpr = pllctrl3_startup | (uint16_t)((force_cal << CSR_PLLFORCECAL_LSB) |
803 						     (pllencal << CSR_PLLENCAL_LSB));
804 
805 	mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (TMASTER | CSR_PLLCTRL3_ADDR))),
806 		      pllctrl3_startup);
807 	mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (TINITENG | CSR_SEQ0BGPR6_ADDR))),
808 		      pllctrl3_gpr);
809 }
810 
811 /*
812  * This function programs registers that are normally set by training
813  * firmware.
814  *
815  * This function is used in place of running 1D or 1D training steps. PhyInit
816  * calls this function when skip_train = true. In that case, PhyInit does not
817  * execute training firmware and this function is called instead to program
818  * PHY registers according to DRAM timing parameters specified in userInput
819  * data structure. See documentation of ddrphy_phyinit_struct.h file
820  * details of timing parameters available in skip training.
821  *
822  * \warning ddrphy_phyinit_progcsrskiptrain() only supports zero board
823  * delay model. If system board delays are set or randomized, full 1D or 1D
824  * initialization flow must be executed.
825  *
826  * This function replaces these steps in the PHY Initialization sequence:
827  *  - (E) Set the PHY input clocks to the desired frequency
828  *  - (F) Write the Message Block parameters for the training firmware
829  *  - (G) Execute the Training Firmware
830  *  - (H) Read the Message Block results
831  *
832  * \returns \c void
833  */
834 void ddrphy_phyinit_progcsrskiptrain(struct stm32mp_ddr_config *config,
835 				     struct pmu_smb_ddr_1d *mb_ddr_1d, uint32_t ardptrinitval)
836 {
837 	uint16_t txdqsdly;
838 
839 	/*
840 	 * Program ATxDlY
841 	 * For DDR4, DDR3 and LPDDR4, leave AtxDly[6:0] at default (0x0)
842 	 */
843 
844 	dfimrl_program(config, mb_ddr_1d, ardptrinitval);
845 
846 	txdqsdlytg_program(config, mb_ddr_1d, &txdqsdly);
847 
848 	txdqdlytg_program(config, mb_ddr_1d, txdqsdly);
849 
850 	rxendly_program(config, mb_ddr_1d);
851 
852 #if STM32MP_LPDDR4_TYPE
853 	seq0bgpr_program(config);
854 
855 	hwtlpcsen_program(config);
856 
857 	pptdqscntinvtrntg_program(config);
858 
859 	pptctlstatic_program(config);
860 #endif /* STM32MP_LPDDR4_TYPE */
861 
862 	hwtcamode_program();
863 
864 	dllgainctl_dlllockparam_program(config);
865 
866 	acsmctrl23_program();
867 
868 	pllforcecal_plldacvalin_program();
869 
870 	/*
871 	 * ##############################################################
872 	 *
873 	 * Setting PhyInLP3 to 0 to cause PIE to execute LP2 sequence instead of INIT on first
874 	 * dfi_init_start.
875 	 * This prevents any DRAM commands before DRAM is initialized, which is the case for
876 	 * skip_train.
877 	 *
878 	 * Moved to here from dddrphy_phyinit_I_loadPIEImage()
879 	 * These should not be needed on S3-exit
880 	 *
881 	 * Note this executes for SkipTrain only, *not* DevInit+SkipTrain
882 	 * DevInit+SkipTrain already initializes DRAM and thus don't need to avoid DRAM commands
883 	 *
884 	 * ##############################################################
885 	 */
886 
887 	/*
888 	 * Special skipTraining configuration to Prevent DRAM Commands on the first dfi
889 	 * status interface handshake. In order to see this behavior, the first dfi_freq
890 	 * should be in the range of 0x0f < dfi_freq_sel[4:0] < 0x14.
891 	 */
892 	mmio_write_16((uintptr_t)(DDRPHYC_BASE + (4U * (TINITENG | CSR_PHYINLP3_ADDR))), 0x0U);
893 }
894