xref: /rk3399_ARM-atf/drivers/ti/clk/ti_pll.c (revision a28114d66a6d43db4accef5fd5d6dab6c059e584)
1 /*
2  * Copyright (c) 2025-2026 Texas Instruments Incorporated - https://www.ti.com
3  *
4  * SPDX-License-Identifier: BSD-3-Clause
5  */
6 
7 /*
8  * TI Generic PLL Driver
9  *
10  * This driver provides common PLL (Phase-Locked Loop) frequency calculation
11  * and configuration algorithms shared across different PLL variants (16FFT,
12  * deskew, etc.). It implements the core PLL math for determining optimal
13  * divider and multiplier values to achieve target frequencies within VCO
14  * constraints, supporting both integer and fractional frequency synthesis.
15  */
16 
17 #include <assert.h>
18 #include <errno.h>
19 #include <limits.h>
20 
21 #include <ti_clk_pll.h>
22 #include <ti_container_of.h>
23 
24 struct ti_pll_consider_data {
25 	/* Original parameters */
26 	struct ti_clk *clk;
27 	const struct ti_pll_data *data;
28 	const struct ti_clk_data_pll *pll;
29 	const struct ti_clk_range *vco;
30 	const struct ti_clk_range *vco_in;
31 	uint32_t input;
32 	uint32_t output;
33 	uint32_t min;
34 	uint32_t max;
35 
36 	/* Setting to consider */
37 	uint32_t curr_plld;
38 	uint32_t curr_clkod;
39 	/* clkod * plld */
40 	uint32_t clkod_plld;
41 	/* max of vco min and output min * clkod */
42 	uint32_t vco_min;
43 	/* min of vco max and output max * clkod */
44 	uint32_t vco_max;
45 	/* Current best results */
46 	uint32_t min_delta;
47 	uint32_t min_delta_rem;
48 	uint32_t min_delta_div;
49 	uint32_t max_fitness;
50 	int32_t max_bin;
51 	uint32_t best_actual;
52 	uint32_t *plld;
53 	uint32_t *pllm;
54 	uint32_t *pllfm;
55 	uint32_t *clkod;
56 };
57 
58 enum consider_result {
59 	PLLM_NO_VALUE,	/* No result stored yet */
60 	PLLM_UNTESTED,	/* We did not actually test the value */
61 	PLLM_HIGH,	/* pllm value was too high */
62 	PLLM_LOW,	/* pllm value was too low */
63 	PLLM_OK,	/* pllm value produced a frequency within limits */
64 };
65 
66 /**
67  * ti_pll_consider_entry() - Evaluate a PLL table entry as a candidate frequency.
68  * @data: PLL search state including target, min, max and best-match tracking.
69  * @entry: A single PLL table entry with fixed plld/pllm/pllfm/clkod values.
70  *
71  * Consider the given pll table entry as a new possible best match. Make sure
72  * our desired output frequency falls within the table's given frequency range
73  * and that the actual frequency produced falls within the desired output
74  * frequency range. Note that no VCO testing is done. PLL table entries are
75  * permitted to violate PLL specifications.
76  */
ti_pll_consider_entry(struct ti_pll_consider_data * data,const struct ti_pll_table_entry * entry)77 static void ti_pll_consider_entry(struct ti_pll_consider_data *data,
78 				  const struct ti_pll_table_entry *entry)
79 {
80 	uint64_t rem64;
81 	uint32_t rem;
82 	uint64_t actual64;
83 	uint32_t actual;
84 	uint32_t clkod_plld;
85 
86 	/* Check if desired output is in given range for PLL table entry. */
87 	if ((data->output < entry->freq_min_hz) ||
88 	    (data->output > entry->freq_max_hz)) {
89 		return;
90 	}
91 
92 	/*
93 	 * Determine actual frequency given table entry would produce:
94 	 *
95 	 * actual = input * pllm / (plld * clkod)
96 	 */
97 	clkod_plld = entry->plld * entry->clkod;
98 
99 	actual64 = ((uint64_t)data->input * entry->pllm) / clkod_plld;
100 	rem64 = ((uint64_t)data->input * entry->pllm) % clkod_plld;
101 	rem = (uint32_t)rem64;
102 
103 	if (entry->pllfm != 0U) {
104 		uint64_t fret;
105 		uint64_t frem;
106 		uint32_t stride = 1U;
107 		uint32_t pllfm_bits;
108 		uint32_t pllfm_range;
109 		uint32_t pllfm_mask;
110 
111 		pllfm_bits = data->data->pllfm_bits;
112 		pllfm_range = (uint32_t) (1U << pllfm_bits);
113 		pllfm_mask = pllfm_range - 1U;
114 
115 		if (data->data->pllm_stride != NULL) {
116 			stride = data->data->pllm_stride(data->clk, entry->pllm);
117 		}
118 
119 		/* Calculate fractional component of frequency */
120 		fret = ((uint64_t)data->input * entry->pllfm) / clkod_plld;
121 		frem = ((uint64_t)data->input * entry->pllfm) % clkod_plld;
122 
123 		fret *= stride;
124 		frem *= stride;
125 		fret += (uint32_t)frem / clkod_plld;
126 		frem = (uint32_t)frem % clkod_plld;
127 
128 		frem += (uint32_t)(fret & pllfm_mask) * clkod_plld;
129 
130 		actual64 += fret >> pllfm_bits;
131 		rem += (uint32_t)(frem >> pllfm_bits);
132 
133 		actual64 += (uint32_t)rem / clkod_plld;
134 		rem = (uint32_t)rem % clkod_plld;
135 	}
136 
137 	actual = (uint32_t)actual64;
138 
139 	/*
140 	 * Check if output is within our allowable bounds. Don't round
141 	 * up, accept anything up to but not including max + 1.
142 	 */
143 	if ((actual < data->min) || (actual > data->max)) {
144 		return;
145 	}
146 
147 	/* Check how close we got */
148 	uint32_t delta;
149 	uint32_t delta_rem;
150 
151 	if (data->output > actual) {
152 		delta = data->output - actual;
153 		if (rem != 0U) {
154 			delta_rem = clkod_plld - rem;
155 			delta--;
156 		} else {
157 			delta_rem = 0U;
158 		}
159 	} else {
160 		delta = actual - data->output;
161 		delta_rem = rem;
162 	}
163 
164 	if ((delta < data->min_delta) ||
165 	    ((delta == data->min_delta) && (data->min_delta_rem != 0U) &&
166 	     ((delta_rem * data->min_delta_div) < (data->min_delta_rem * clkod_plld)))) {
167 		*data->pllm = entry->pllm;
168 		*data->pllfm = entry->pllfm;
169 		*data->plld = entry->plld;
170 		*data->clkod = entry->clkod;
171 		data->min_delta = delta;
172 		data->min_delta_rem = delta_rem;
173 		data->min_delta_div = clkod_plld;
174 		data->best_actual = actual;
175 	}
176 }
177 
178 /**
179  * ti_pll_consider() - Evaluate a plld/pllm/pllfm/clkod combination as a best match candidate.
180  * @data: PLL search state including target, min, max and best-match tracking.
181  * @curr_pllm: Integer PLL multiplier value to evaluate.
182  * @curr_pllfm: Fractional PLL multiplier value to evaluate.
183  * @stride: Internal multiplier stride for fractional calculations.
184  *
185  * Make sure it produces a valid VCO frequency and a valid output frequency. If
186  * it's better than the current best match, store it as the new best match.
187  *
188  * Return: PLLM_OK if the combination is valid, PLLM_LOW/PLLM_HIGH if out of
189  *         VCO range, PLLM_UNTESTED if skipped due to bin preference.
190  */
ti_pll_consider(struct ti_pll_consider_data * data,uint32_t curr_pllm,uint32_t curr_pllfm,uint32_t stride)191 static enum consider_result ti_pll_consider(struct ti_pll_consider_data *data,
192 					    uint32_t curr_pllm,
193 					    uint32_t curr_pllfm, uint32_t stride)
194 {
195 	uint32_t rem;
196 	uint64_t result64;
197 	uint32_t vco;
198 	int32_t bin;
199 	uint32_t div;
200 	uint64_t fret;
201 	uint64_t frem;
202 	uint32_t mask;
203 	uint32_t bits;
204 	uint32_t actual;
205 	uint32_t delta;
206 	uint32_t delta_rem;
207 	uint32_t fitness;
208 	uint32_t a;
209 	uint32_t b;
210 	bool better;
211 	bool same;
212 
213 	bin = data->data->bin(data->clk, data->curr_plld, curr_pllm,
214 			      curr_pllfm != 0U, data->curr_clkod);
215 
216 	/* Always ignore worse bins. */
217 	if (bin < data->max_bin) {
218 		return PLLM_UNTESTED;
219 	}
220 
221 	div = data->curr_plld;
222 
223 	/*
224 	 * Calculate the VCO frequency this combination will produce:
225 	 *
226 	 * vco = input * pllm / plld
227 	 */
228 	result64 = ((uint64_t)data->input * curr_pllm) / div;
229 	rem = (uint32_t)(((uint64_t)data->input * curr_pllm) % div);
230 
231 	if (curr_pllfm != 0U) {
232 		bits = data->data->pllfm_bits;
233 		mask = (uint32_t) (1U << bits) - 1U;
234 
235 		/* Calculate fractional part of frequency */
236 		fret = ((uint64_t)data->input * curr_pllfm) / div;
237 		frem = ((uint64_t)data->input * curr_pllfm) % div;
238 
239 		/* Fold in multiplier */
240 		fret *= stride;
241 		frem *= stride;
242 		fret += (uint32_t)frem / div;
243 		frem = (uint32_t)frem % div;
244 
245 		/* Fold fractional part back into remainder */
246 		frem += (fret & (uint64_t) mask) * div;
247 
248 		/* Add integer part */
249 		result64 += fret >> bits;
250 		rem += (uint32_t) (frem >> bits);
251 
252 		result64 += (uint64_t) rem / div;
253 		rem = rem % div;
254 	}
255 
256 	/* Make sure our result at least fits within 32-bits */
257 	vco = (uint32_t) result64;
258 
259 	/* Round down for min check. */
260 	if (vco < data->vco_min) {
261 		return PLLM_LOW;
262 	}
263 
264 	if (vco > data->vco_max) {
265 		/*
266 		 * Normally round down for max check (ignore remainder).
267 		 * This allows all frequencies up to but not including
268 		 * +1.
269 		 */
270 		return PLLM_HIGH;
271 	}
272 
273 	if ((vco == data->vco->max_hz) && (rem != 0U)) {
274 		/*
275 		 * If we are at the vco limit and not just the output
276 		 * limit, round up, don't allow any remainder.
277 		 */
278 		return PLLM_HIGH;
279 	}
280 
281 	/*
282 	 * Calculate the actual frequency produced:
283 	 *
284 	 * actual = input * pllm / (clkod * plld)
285 	 * actual = vco / clkod
286 	 *
287 	 * We already have the vco frequency from above, so we
288 	 * simply use that as a starting point.
289 	 */
290 	actual = vco / data->curr_clkod;
291 	rem += (vco % data->curr_clkod) * div;
292 	div *= data->curr_clkod;
293 	if (rem >= div) {
294 		actual++;
295 		rem -= div;
296 	}
297 
298 	/* Check how close we got. */
299 	if (data->output > actual) {
300 		delta = data->output - actual;
301 		if (rem != 0U) {
302 			delta_rem = div - rem;
303 			delta--;
304 		} else {
305 			delta_rem = 0U;
306 		}
307 	} else {
308 		delta = actual - data->output;
309 		delta_rem = rem;
310 	}
311 
312 	better = false;
313 	same = false;
314 	if (delta < data->min_delta) {
315 		/* Easy, clear winner */
316 		better = true;
317 	} else if (delta == data->min_delta) {
318 		if ((delta_rem == 0U) && (data->min_delta_rem == 0U)) {
319 			/* Both are bang on at same delta */
320 			same = true;
321 		} else if (delta_rem == 0U) {
322 			/*
323 			 * Other delta has a remainder, but we don't,
324 			 * we are closer.
325 			 */
326 			better = true;
327 		} else if (data->min_delta_rem != 0U) {
328 			/*
329 			 * We both have remainders, cross multiply
330 			 * and compare result to determine same, better,
331 			 * or worse match.
332 			 *
333 			 * An exception to this is if both values are
334 			 * within 1 of the target. In this case we
335 			 * always prefer the one above the target.
336 			 * This ensures that if they program 50Hz and
337 			 * we can return 49.9Hz or 50.9Hz, we return
338 			 * 50Hz. Otherwise if they then decided to
339 			 * program 49Hz they would likely get 48.9Hz,
340 			 * and so on.
341 			 */
342 			if ((delta == 0U) && (data->best_actual != actual)) {
343 				/* One result at target, one at target - 1 */
344 				if (actual == data->output) {
345 					/* Our result is above, we are better */
346 					better = true;
347 				} else {
348 					/* Other result is above, they are better */
349 				}
350 			} else {
351 				a = delta_rem * data->min_delta_div;
352 				b = data->min_delta_rem * div;
353 
354 				if (a < b) {
355 					better = true;
356 				} else if (a == b) {
357 					same = true;
358 				} else {
359 					/* Do Nothing */
360 				}
361 			}
362 		} else {
363 			/* Do Nothing */
364 		}
365 	} else {
366 		/* Do Nothing */
367 	}
368 
369 	/*
370 	 * Update the winner if:
371 	 * - bin is better, OR
372 	 * - bin is same and delta is better, OR
373 	 * - bin and delta are same and fitness is better
374 	 */
375 	if (!((bin == data->max_bin) && !same && !better)) {
376 		fitness = data->data->vco_fitness(data->clk, vco, curr_pllfm != 0U);
377 
378 		if (!((bin == data->max_bin) && same && (fitness <= data->max_fitness))) {
379 			/*
380 			 * New winner. bin is better, or bin is the
381 			 * same and delta is better, or bin and delta
382 			 * are the same and fitness is better.
383 			 */
384 			*data->plld = data->curr_plld;
385 			*data->pllm = curr_pllm;
386 			*data->pllfm = curr_pllfm;
387 			*data->clkod = data->curr_clkod;
388 			data->max_bin = bin;
389 			data->min_delta = delta;
390 			data->min_delta_rem = delta_rem;
391 			data->min_delta_div = div;
392 			data->max_fitness = fitness;
393 			data->best_actual = actual;
394 		}
395 	}
396 	return PLLM_OK;
397 }
398 
ti_pll_consider_fractional(struct ti_pll_consider_data * data,uint32_t curr_pllm,uint32_t pllm_rem,uint32_t pllfm_estimate,uint32_t stride)399 static inline void ti_pll_consider_fractional(struct ti_pll_consider_data *data,
400 					      uint32_t curr_pllm,
401 					      uint32_t pllm_rem,
402 					      uint32_t pllfm_estimate,
403 					      uint32_t stride)
404 {
405 	uint32_t pllfm_range;
406 	uint32_t lowest_pllfm;
407 	uint32_t highest_pllfm;
408 	uint64_t pllfm_input;
409 	uint64_t best_delta;
410 	uint32_t best_pllfm;
411 	uint32_t curr_pllfm;
412 	uint32_t extra;
413 	uint64_t delta;
414 	bool found_best;
415 	uint64_t rem_target;
416 
417 	pllfm_range = (uint32_t) (1U << data->data->pllfm_bits);
418 
419 	/* Because it's an estimate, we must check +/- 1 */
420 	lowest_pllfm = pllfm_estimate - 1U;
421 	highest_pllfm = pllfm_estimate + 1U;
422 
423 	/*
424 	 * Clip at bonudaries. Note that we test the values
425 	 * associated with pll frac disabled and with the
426 	 * next pllm value. If either value is the closest
427 	 * value, we reject the configuration since the
428 	 * non-frac ti_pll_consider call will handle it.
429 	 */
430 	if (pllfm_estimate == 0U) {
431 		lowest_pllfm = 0U;
432 	}
433 	if (pllfm_estimate > pllfm_range) {
434 		highest_pllfm = pllfm_range;
435 	}
436 
437 	/*
438 	 * Exception to the above. If curr_pllm+stride is not a valid
439 	 * multiplier, ignore the value 1 past the max register value
440 	 * above as it *cannot* be handled by the non-frac case.
441 	 */
442 	if (pllfm_estimate == pllfm_range) {
443 		if ((curr_pllm == data->data->pllm_max) ||
444 		    !((data->data->pllm_valid == NULL) ||
445 		      data->data->pllm_valid(data->clk, curr_pllm + stride, false))) {
446 			highest_pllfm--;
447 		}
448 	}
449 
450 	/*
451 	 * Additional exception. If the curr_pllm+stride value produces an
452 	 * invalid frequency, it cannot be handled by the non-frac case. This
453 	 * should be an uncommon case.
454 	 */
455 	if (pllfm_estimate == pllfm_range) {
456 		uint32_t test_pllm;
457 		uint64_t test_vco;
458 		uint64_t test_vco_rem;
459 
460 		test_pllm = curr_pllm + stride;
461 
462 		test_vco = (uint64_t)(data->input / data->curr_plld) * test_pllm;
463 		test_vco_rem = (uint64_t)(data->input % data->curr_plld) * test_pllm;
464 		extra = (uint32_t)test_vco_rem / test_pllm;
465 
466 		test_vco += extra;
467 		test_vco_rem -= (uint64_t)(extra * test_pllm);
468 		if (test_vco > data->vco_max) {
469 			highest_pllfm--;
470 		} else if ((test_vco == data->vco->max_hz) && (test_vco_rem != 0U)) {
471 			highest_pllfm--;
472 		} else {
473 			/* Do Nothing */
474 		}
475 	}
476 
477 	/*
478 	 * Find the value that gets closest to the actual remainder.
479 	 *
480 	 * remainder = pllfm * input / (pllfm range * plld)
481 	 * remainder * plld * pllfm range = pllfm * input
482 	 *
483 	 * Note that pllm_rem is already multiplied by plld, so:
484 	 *
485 	 * pllm_rem * pllfm range = pllfm * input
486 	 */
487 	rem_target = ((uint64_t) pllm_rem) * (1ULL << data->data->pllfm_bits);
488 
489 	/* Start at the lowest and walk it up to the highest */
490 	pllfm_input = ((uint64_t) lowest_pllfm) * data->input;
491 
492 	found_best = false;
493 	best_delta = 0ULL;
494 	best_pllfm = 0U;
495 
496 	for (curr_pllfm = lowest_pllfm; curr_pllfm <= highest_pllfm; curr_pllfm++) {
497 		/* Find out how far we are off */
498 		if (rem_target >= pllfm_input) {
499 			delta = rem_target - pllfm_input;
500 		} else {
501 			delta = pllfm_input - rem_target;
502 		}
503 
504 		/* See if it's better than our best */
505 		if (!found_best || (delta < best_delta)) {
506 			best_delta = delta;
507 			found_best = true;
508 			best_pllfm = curr_pllfm;
509 		}
510 
511 		/* pllfm_input = pllfm * input */
512 		pllfm_input += data->input;
513 	}
514 
515 	if (found_best) {
516 		if ((best_pllfm == 0U) || (best_pllfm == pllfm_range)) {
517 			/*
518 			 * Ignore the result, we will be better served by a
519 			 * whole number pllm result.
520 			 */
521 			found_best = false;
522 		}
523 	}
524 
525 	if (found_best) {
526 		(void)ti_pll_consider(data, curr_pllm, best_pllfm, stride);
527 	}
528 }
529 
530 /**
531  * ti_pll_find_pllm() - Find floor and ceiling pllm values around an ideal value.
532  * @clkp: The PLL clock instance.
533  * @data: PLL driver data containing valid-pllm callback and pllm_max.
534  * @ideal: The ideal integer pllm value.
535  * @remainder: Non-zero if the ideal pllm has a fractional component.
536  * @low: Output: largest valid pllm at or below ideal, or 0 if none.
537  * @high: Output: smallest valid pllm at or above ideal, or 0 if none.
538  *
539  * Given an ideal pllm value, calculate a floor result (low) and a ceiling
540  * result (high). Both the low and high values are walked up/down until they
541  * find the first valid pllm value. If no such value exists, they return 0.
542  */
ti_pll_find_pllm(struct ti_clk * clkp,const struct ti_pll_data * data,uint32_t ideal,uint32_t remainder,uint32_t * low,uint32_t * high)543 static inline void ti_pll_find_pllm(struct ti_clk *clkp, const struct ti_pll_data *data,
544 				    uint32_t ideal, uint32_t remainder,
545 				    uint32_t *low, uint32_t *high)
546 {
547 	*low = 0U;
548 	*high = 0U;
549 
550 	if (ideal >= data->pllm_max) {
551 		/*
552 		 * The ideal pllm value is above the range of valid pllm
553 		 * values. We can walk it lower for binning, but we can't
554 		 * walk it any higher.
555 		 */
556 		*low = data->pllm_max;
557 		*high = 0U;
558 	} else if (ideal == 0U) {
559 		/* pllm cannot be zero. Start at 1 and walk up if necessary */
560 		*low = 0U;
561 		*high = 1U;
562 	} else {
563 		/*
564 		 * The output frequency of a PLL is calculated by:
565 		 *
566 		 * output = input * pllm / (plld * clkod)
567 		 *
568 		 * We solve for pllm and look for the floor and
569 		 * ceiling result:
570 		 *
571 		 * low = floor(output * clkod * plld / input)
572 		 * high = ceil(output * clkod * plld / input)
573 		 *
574 		 * If there is low fractional component in the result,
575 		 * low == high.
576 		 */
577 		*low = ideal;
578 		*high = ideal + remainder;
579 	}
580 
581 	/* Walk pllm down to find the first valid value */
582 	while ((data->pllm_valid != NULL) && !data->pllm_valid(clkp, *low, false)) {
583 		if (0U == (--(*low))) {
584 			/*
585 			 * Walked off the end of valid values, low = 0
586 			 * indicades no valid low value.
587 			 */
588 			break;
589 		}
590 	}
591 
592 	/* Walk pllm up to find the second valid value */
593 	while ((data->pllm_valid != NULL) && !data->pllm_valid(clkp, *high, false)) {
594 		if ((*high)++ == data->pllm_max) {
595 			/*
596 			 * Walked off the end of valid values, set high = 0
597 			 * to indicate no valid high value.
598 			 */
599 			*high = 0U;
600 			break;
601 		}
602 	}
603 }
604 
605 /**
606  * ti_pll_internal_calc() - Calculate ideal plld/pllm/clkod values for a given input frequency.
607  * @consider_data: PLL search state including input/output frequencies, VCO constraints,
608  *                 and storage for the best plld, pllm, pllfm, and clkod results.
609  *
610  * Calculate ideal plld/pllm/clkod values for a given input frequency and
611  * desired output frequency. The core component of a PLL is the Voltage
612  * Controlled Oscillator (VCO). The PLL adjusts the frequency of the VCO
613  * so that it counts out a specified number of clock cycles for each input
614  * clock cycle. Most PLLs produce a cleaner output for higher VCO values.
615  *
616  * The number of VCO output cycles counter per input tick is referred to
617  * as the PLL multiplier, or pllm. Typical PLLs permit a wide range of
618  * values, from 1 to a value of hundreds or often thousands. Lower
619  * multiplier values are typically preferred for better PLL performance.
620  *
621  * In order to produce a wider range of possible frequencies, PLLs also
622  * provide an input divider, plld. The input clock is divided before
623  * providing synchronization ticks for the VCO.
624  *
625  * VCOs typically must run at high frequencies (hundreds of MHz, with a max
626  * generally in the GHz range), usually above the desired output frequency.
627  * For this reason, PLLs contain clock output dividers that device the VCO
628  * output frequency.
629  *
630  * Because the performance requirements of each PLL is different, we allow
631  * the PLL driver to compute the fitness of a given VCO frequency and prefer
632  * higher fitness values for two sets of plld/pllm/clkod values that produce
633  * the same frequency.
634  *
635  * We also allow the PLL driver to bin results. Binning is currently used to
636  * prefer all pllm values below 512 to higher values. This is because the
637  * hardware team has indicated that for our current PLLs, values 512 and
638  * above should only be used if absolutely necessary to reach a specific
639  * frequency tolerance.
640  *
641  * The general algorithm of the function is for each plld/clkod combination
642  * we find the best pllm value. For our current PLLs, plld can range from 1
643  * to 64, and clkod from 1 to 16 excluding odd values except for 1. This
644  * gives 576 possible combinations.
645  *
646  * The range of plld values is typically limited to a much smaller range due
647  * to the input frequency / plld needing to remain above the minimum VCO
648  * input frequency. For instance, with a PLL input frequency of 24MHz and a
649  * minimum VCO input frequency of 3.2MHz, the maximum plld value is 7. This
650  * reduces the number of plld/clkod combinations to 63 in common practice.
651  *
652  * The clkod can be limited by comparing the output frequency range to the
653  * VCO frequency range. If there is no output frequency range (0 to
654  * ULONG_MAX) then the full clkod range must be tested. However, if the
655  * HLOS passes a frequency range of 95MHz to 100MHz, and the VCO range
656  * is 700MHz to 4.2GHz, then the allowable clkod range becomes 8 to
657  * 16, 5 values. This reduces the number of plld/clkod combinations down
658  * to 35.
659  *
660  * For each clkod value, we produce an ideal VCO value. Then, for each
661  * plld value, we find a pllm value that will produce that VCO value.
662  * Because the given pllm/plld combination will likely produce a value
663  * not exactly equal to the desired VCO value, we usually test two pllm
664  * values. One that produces the frequency below the target VCO value, and
665  * one that produces the frequency above the target VCO value.
666  *
667  * In order to find a value that is within the best bin, we ask the PLL
668  * driver to provide us with additional pllm values. For each pllm value
669  * we test, we ask the driver if another pllm value exists in another
670  * bin either above or below the current pllm value. We repeat this
671  * process until the driver indicates no more such values exist. Our
672  * current PLL drivers only have two bins. One for values below 512, bin 1,
673  * and one for higher values, bin 0. In practice this means if our low
674  * pllm value is in bin 0, we will also test 511, which is in bin 1.
675  *
676  * The pll_calc function will also test if a PLL table entry exists for
677  * the desired output frequency and produces a frequency within the desired
678  * output range given the input frequency. If so, it will just return the
679  * table entry rather than attempting to calculate plld/pllm/clkod values.
680  *
681  * Note on 32/64 bit math:
682  *
683  * div64 can be very expensive compared to a simple udiv instruction.
684  * Avoid using it by splitting up the division into two parts:
685  *
686  * (A*B)/C = (A/C) * B
687  *
688  * The LHS division is likely to require 64 bit division, but the RHS
689  * will not require 64 bit division (assuming A and C are 32 bits).
690  * However, precision is lost if integer division is done. Therefore
691  * the division needs to be split up further so the remainder can
692  * be tracked.
693  *
694  * integer part = A//C
695  * remainder = A%C
696  *
697  * Those values can be recombined to form the original result:
698  *
699  * A/C = A//C + (A%C)/C
700  *
701  * We can then mix the B term in:
702  *
703  * (A/C)*B = (A//C) * B + ((A%C) * B) / C
704  *
705  * The final division may be larger than 32 bits if the number if
706  * (C-1)*Bmax > ULONG_MAX. This can easily be checked. In
707  * our case, this is the combination of the plld, pllm, and clkod
708  * values which fits within 32 bits.
709  *
710  * A check is added in case the final division does need 64
711  * bits due to programming error or unexpected input.
712  */
ti_pll_internal_calc(struct ti_pll_consider_data * consider_data)713 static inline void ti_pll_internal_calc(struct ti_pll_consider_data *consider_data)
714 {
715 	struct ti_clk *clkp = consider_data->clk;
716 	const struct ti_pll_data *data = consider_data->data;
717 	uint32_t lowest_clkod;
718 	uint32_t highest_clkod;
719 	uint32_t clkod;
720 	uint32_t lowest_plld;
721 	uint32_t highest_plld;
722 	uint32_t vco_target;
723 	uint64_t input_inverse64 = 0U;
724 	uint32_t estimate_pllfm;
725 	uint32_t stride;
726 	uint64_t frem;
727 	bool do_frac;
728 
729 	/*
730 	 * Find allowable plld range. plld is the PLL input divider. Valid
731 	 * plld values are constrained by the PLL limitations on plld and
732 	 * also by the input frequency limits of the VCO:
733 	 *
734 	 * VCO input frequecy = input frequency / plld
735 	 *
736 	 * To calculate our valid range, we use:
737 	 *
738 	 * lowest plld = ceil(input frequency / VCO input maximum)
739 	 * highest plld = floor(input frequency / VCO input minimum)
740 	 *
741 	 * We then clip lowest/highest to the PLL's plld range. Note that
742 	 * after clipping, lowest_plld may be larger than highest_plld, in
743 	 * which case there are no possible combinations and we will return
744 	 * 0.
745 	 */
746 	lowest_plld = 1U + ((consider_data->input - 1U) / consider_data->vco_in->max_hz);
747 	if (consider_data->vco_in->min_hz != 0U) {
748 		highest_plld = consider_data->input / consider_data->vco_in->min_hz;
749 	} else {
750 		highest_plld = data->plld_max;
751 	}
752 
753 	if (highest_plld > data->plld_max) {
754 		highest_plld = data->plld_max;
755 	}
756 
757 	/*
758 	 * Find allowable clkod range. clkod is the PLL output divider. Valid
759 	 * clkod values are constrained by the PLL limitations on clkod and
760 	 * also by the relationship between the maximum/minimum VCO
761 	 * frequencies and the maximum/minimum output frequency. The output
762 	 * frequency of a PLL is given by:
763 	 *
764 	 * output frequency = VCO frequency / clkod
765 	 *
766 	 * To calculate the range of possible clkod values, we use:
767 	 *
768 	 * lowest clkod = ceil(Minimum VCO freq / maximum output freq)
769 	 * highest clkod = floor(Maximum VCO freq / minimum output freq)
770 	 *
771 	 * Any lower dividers would not be able to divide the lowest possible
772 	 * VCO frequency enough to be at or below the maximum output
773 	 * frequency.
774 	 *
775 	 * Any higher dividers would divide the highest possible VCO frequency
776 	 * by too much and it be below the minimum output frequency.
777 	 *
778 	 * Note that we want to accept all frequency up to but not including
779 	 * max + 1. For example, if the user gives a max frequency of 687Hz,
780 	 * we will accept 687.8Hz, but not 688Hz. We add one to the max
781 	 * frequency here to calculate for all frequencies up to and including
782 	 * max + 1, and then later reject exact matches to max + 1.
783 	 *
784 	 * We then clip lowest/highest to the PLL's clkod range. As with plld,
785 	 * the resulting lowest value may be above the highest value.
786 	 */
787 	if (consider_data->max == UINT_MAX) {
788 		/* Avoid divide by zero, result will always be one anyway */
789 		lowest_clkod = 1U;
790 	} else if (consider_data->vco->min_hz == 0U) {
791 		/* 0 is a legal value for vco min_hz */
792 		lowest_clkod = 0U;
793 	} else {
794 		lowest_clkod = 1U + ((consider_data->vco->min_hz - 1U) / (consider_data->max + 1U));
795 	}
796 	if (consider_data->min == 0U) {
797 		highest_clkod = data->clkod_max;
798 	} else {
799 		highest_clkod = consider_data->vco->max_hz / consider_data->min;
800 	}
801 
802 	/* Make sure clkod is in range */
803 	if (highest_clkod > data->clkod_max) {
804 		highest_clkod = data->clkod_max;
805 	}
806 	if (lowest_clkod == 0U) {
807 		lowest_clkod++;
808 	}
809 
810 	/*
811 	 * The PLL fractional multiplier can provide a much closer output
812 	 * result over a wide range of values. However, calculating it is
813 	 * potentially difficult especially when considering a wide range of
814 	 * values.
815 	 *
816 	 * The basic equation for finding the register value considers only the
817 	 * frequency range between pllm values, the possible range is thus
818 	 * input_freq * (pllm + 1) - input_freq * pllm which simplifies to
819 	 * input_freq. The relation is thus:
820 	 *
821 	 *	 fraction freq	   register value
822 	 *     ---------------- = ----------------
823 	 *	vco input freq	   register range
824 	 *
825 	 * And solving for the register value:
826 	 *
827 	 * reg = fractional freq * register range / vco input freq
828 	 *
829 	 * Note that register range and fraction freq range are typically of
830 	 * the order 2^24. Multiplying them both together requires a 64 bit
831 	 * type. Performing this 64-bit division for every possible pllm would
832 	 * be prohibitive.
833 	 *
834 	 * We instead use an approximation method. First we note the relation:
835 	 *
836 	 * vco input freq = input freq / plld
837 	 *
838 	 * We pre-calculate an inverse value using a single div64. We use a
839 	 * large numerator to retain as much precision as possible. The value
840 	 * chosen for this extra precision is 32 bits:
841 	 *
842 	 * inverse = register range * 2^32 / input freq
843 	 * vco input freq = input freq / plld
844 	 * input freq = vco input freq * plld
845 	 * inverse = register range * 2^32 / (vco input freq * plld)
846 	 * inverse = (register range / vco input freq) * (2^32 / plld)
847 	 *
848 	 * Note that the larger the input frequency, the smaller the inverse
849 	 * value and the smaller the input frequency, the larger the inverse
850 	 * value. Input frequencies at or below the register range produce a
851 	 * 64-bit value, input frequencies above the register range produce a
852 	 * 32-bit value. Due to this relation, there is no risk of a 64-bit
853 	 * overflow in the multplication below.
854 	 *
855 	 * When we need to calculate the reg value, we use this inverse:
856 	 *
857 	 * reg = fractional freq * register range / vco input freq
858 	 * reg = fractional freq * inverse * plld / 2^32
859 	 *
860 	 * The upper 32 bits will contain the integer portion, and the lower
861 	 * 32 bits the fractional portion. Note that the result is not full
862 	 * precision because the remainder from the initial 64 bit division is
863 	 * not used in the calculation. The remainder can be as large as the
864 	 * input frequency using it would require another 64 bit division.
865 	 *
866 	 * The result is an estimate and may need to be adjusted to obtain the
867 	 * final value. The error can be show as:
868 	 *
869 	 * error = fraction freq * (register range %% (vco input freq * plld)) /
870 	 *			(vco input freq * plld) * plld / 2^32
871 	 *
872 	 * Or a maximum value of:
873 	 *
874 	 * error = fraction freq * (vco input freq*plld-1) /
875 	 *			(vco input freq * plld) * plld / 2^32
876 	 *
877 	 * Or approximately:
878 	 *
879 	 * error = fraction freq * plld / 2^32
880 	 *
881 	 * This value will be less than 1 if fraction freq * plld < 2^32. Note
882 	 * that the maximum value of the fractional freq in the vco input
883 	 * freq - 1. Or approximately input freq / plld. The expression can be
884 	 * more simplly represented by:
885 	 *
886 	 * input freq < 2^32
887 	 *
888 	 * Thus estimate is off by a fractional value and we only need to check
889 	 * the values directly above and below the estimate to check which value
890 	 * is closest.
891 	 */
892 	do_frac = (data->pllfm_bits != 0U) && consider_data->pll->fractional_support;
893 	if (do_frac) {
894 		uint64_t input_inverse_rem64;
895 
896 		input_inverse_rem64 = (uint64_t) 1U << data->pllfm_bits;
897 		input_inverse_rem64 <<= 32U;
898 		input_inverse64 = input_inverse_rem64 / consider_data->input;
899 		input_inverse_rem64 = input_inverse_rem64 % consider_data->input;
900 	}
901 
902 	for (clkod = lowest_clkod; clkod <= highest_clkod; clkod++) {
903 		uint32_t extra;
904 		uint32_t plld;
905 		uint32_t ideal_pllm;
906 		uint64_t ideal_pllm_rem;
907 		uint32_t ideal_pllm_step;
908 		uint32_t ideal_pllm_step_rem;
909 
910 		/*
911 		 * We can just compare the VCO value against the below
912 		 * generated limit to be sure it fits within the legal
913 		 * VCO limits and the allowable output frequency.
914 		 */
915 		consider_data->vco_min = CLAMP((uint64_t)consider_data->vco->min_hz,
916 					       (uint64_t)clkod * consider_data->min,
917 					       (uint64_t)UINT32_MAX);
918 
919 		uint64_t vco_max = (uint64_t)clkod * (consider_data->max + 1) - 1U;
920 
921 		consider_data->vco_max = MIN(vco_max, (uint64_t)consider_data->vco->max_hz);
922 
923 		if (data->clkod_valid && !data->clkod_valid(clkp, clkod)) {
924 			continue;
925 		}
926 
927 		consider_data->curr_clkod = clkod;
928 
929 		/*
930 		 * vco_target is our ideal VCO frequency for a given clkod value:
931 		 *
932 		 * output = VCO frequency / clkod
933 		 * VCO frequency = output * clkod
934 		 *
935 		 * Since we are incrementing clkod by one each loop, we can just do
936 		 * the multiplication once here with the initial clkod value and
937 		 * then add output each time we increment clkod. We start with
938 		 * the lowest_clkod - 1 to make the continue logic a bit easier.
939 		 */
940 		vco_target = CLAMP((uint64_t)consider_data->vco->min_hz,
941 				   (uint64_t)consider_data->output * clkod,
942 				   (uint64_t)consider_data->vco->max_hz);
943 
944 		/*
945 		 * How much to add to out ideal_pllm value and remainder
946 		 * each time we add 1 to plld.
947 		 * Note: clipped to ULONG_MAX by comparison with vco->max_hz
948 		 */
949 		ideal_pllm_step = vco_target / consider_data->input;
950 		ideal_pllm_step_rem = vco_target % consider_data->input;
951 
952 		/*
953 		 * Initial value is ideal_pllm_step * lowest_plld, we
954 		 * actually start at lowest_plld -1 to make the loop
955 		 * logic easier.
956 		 */
957 		ideal_pllm = ideal_pllm_step * (lowest_plld - 1U);
958 		ideal_pllm_rem = ideal_pllm_step_rem;
959 		ideal_pllm_rem *= (uint64_t) (lowest_plld - 1U);
960 
961 		/* Handle the carry */
962 		extra = ((uint32_t) ideal_pllm_rem) / consider_data->input;
963 		ideal_pllm += extra;
964 		ideal_pllm_rem -= ((uint64_t) extra) * consider_data->input;
965 
966 		for (plld = lowest_plld; plld <= highest_plld; plld++) {
967 			uint32_t low_pllm = 0U;
968 			uint32_t high_pllm = 0U;
969 			enum consider_result ret;
970 
971 			consider_data->curr_plld = plld;
972 
973 			/* pllm = vco_target * plld / input */
974 			ideal_pllm += ideal_pllm_step;
975 			ideal_pllm_rem += ideal_pllm_step_rem;
976 
977 			/* Handle the carry */
978 			if (ideal_pllm_rem >= consider_data->input) {
979 				ideal_pllm++;
980 				ideal_pllm_rem -= consider_data->input;
981 			}
982 
983 			consider_data->clkod_plld += clkod * plld;
984 
985 			if (data->plld_valid &&
986 			    !data->plld_valid(clkp, plld)) {
987 				continue;
988 			}
989 
990 			/*
991 			 * Calculate the closest pllm value. We already have
992 			 * our VCO frequency we are targeting given the
993 			 * current output frequency and clkod value. We can
994 			 * then determine the best pllm value given the input
995 			 * frequency and plld value:
996 			 *
997 			 * VCO frequency = input frequency * pllm / plld
998 			 * ideal pllm = VCO frequency * plld / input frequency
999 			 *
1000 			 * Along with the integer result, we get a remainder.
1001 			 * We use this to take the find the ceiling and
1002 			 * floor result.
1003 			 */
1004 
1005 			/*
1006 			 * Closest pllm values that will produce a frequency
1007 			 * just below and above our target.
1008 			 */
1009 			ti_pll_find_pllm(clkp, data, ideal_pllm,
1010 					 (ideal_pllm_rem != 0U) ? 1U : 0U,
1011 					 &low_pllm, &high_pllm);
1012 
1013 			/*
1014 			 * If we have a low_pllm value, it means the ideal pllm
1015 			 * integer multiplier produces a value below the ideal
1016 			 * frequency. A fractional addition can help.
1017 			 * Note that if the low_pllm is equal to the ideal_pllm
1018 			 * without a remainder, a fractional value won't get us
1019 			 * any closer.
1020 			 */
1021 			if (do_frac && (low_pllm != 0U) &&
1022 			    (low_pllm <= ideal_pllm) &&
1023 			    ((ideal_pllm_rem != 0U) ||
1024 			     (low_pllm != ideal_pllm)) &&
1025 			    ((data->pllm_valid == NULL) ||
1026 				data->pllm_valid(clkp, low_pllm, true))) {
1027 				/*
1028 				 * Some PLLs have an internal multiplier that
1029 				 * combines with the programmable multiplier.
1030 				 * Make sure to consider this when calculating
1031 				 * the register value. The stride will typically
1032 				 * by high_pllm - low_pllm.
1033 				 */
1034 				stride = 1U;
1035 				frem = ideal_pllm_rem;
1036 
1037 				if (data->pllm_stride != NULL) {
1038 					stride = data->pllm_stride(clkp,
1039 								   low_pllm);
1040 				}
1041 				if (stride != 1U) {
1042 					/*
1043 					 * Adjust the remainder according to our
1044 					 * position within the stride.
1045 					 */
1046 					frem += (ideal_pllm - low_pllm) * consider_data->input;
1047 					frem /= stride;
1048 				}
1049 				estimate_pllfm = (uint32_t) ((frem * input_inverse64) >> 32U);
1050 				/*
1051 				 * Calculate out pllfm estimate as discussed
1052 				 * above. Pass it off to our fractional
1053 				 * consider function.
1054 				 */
1055 				ti_pll_consider_fractional(consider_data, low_pllm,
1056 							   (uint32_t) frem,
1057 							   estimate_pllfm, stride);
1058 			}
1059 
1060 			if ((low_pllm != 0U) && (low_pllm == high_pllm)) {
1061 				/*
1062 				 * If we produce the exact frequency we want,
1063 				 * and ideal_pllm is a valid pllm value, then
1064 				 * low_pllm will be the same as high_pllm. In
1065 				 * this case move high_pllm to the next bin
1066 				 * if one exists to avoid duplicating
1067 				 * calculations.
1068 				 */
1069 				if (data->bin_next_pllm != NULL) {
1070 					high_pllm = data->bin_next_pllm(clkp,
1071 									consider_data->curr_plld,
1072 									high_pllm,
1073 									consider_data->curr_clkod);
1074 				} else {
1075 					high_pllm = 0U;
1076 				}
1077 			}
1078 
1079 			while (low_pllm != 0U) {
1080 				/*
1081 				 * This value will be at or below our target
1082 				 * frequency.
1083 				 */
1084 				ret = ti_pll_consider(consider_data, low_pllm, 0U,
1085 						      0U);
1086 
1087 				/*
1088 				 * Keep walking down to the next bin until we
1089 				 * run out of bins or get a pllm value that
1090 				 * is too low.
1091 				 */
1092 				if ((ret != PLLM_LOW) && (data->bin_prev_pllm != NULL)) {
1093 					low_pllm = data->bin_prev_pllm(clkp,
1094 								       consider_data->curr_plld,
1095 								       low_pllm,
1096 								       consider_data->curr_clkod);
1097 				} else {
1098 					low_pllm = 0U;
1099 				}
1100 			}
1101 
1102 			while (high_pllm != 0U) {
1103 				/*
1104 				 * This value will be above our target
1105 				 * frequency.
1106 				 */
1107 				ret = ti_pll_consider(consider_data, high_pllm, 0U, 0U);
1108 
1109 				/*
1110 				 * Keep walking up to the next bin until we
1111 				 * run out of bins or get a pllm value that
1112 				 * is too high.
1113 				 */
1114 				if ((ret != PLLM_HIGH) && (data->bin_next_pllm != NULL)) {
1115 					high_pllm = data->bin_next_pllm(
1116 									clkp,
1117 									consider_data->curr_plld,
1118 									high_pllm,
1119 									consider_data->curr_clkod);
1120 				} else {
1121 					high_pllm = 0U;
1122 				}
1123 			}
1124 		}
1125 	}
1126 }
1127 
ti_pll_calc(struct ti_clk * clkp,const struct ti_pll_data * data,uint32_t input,uint32_t output,uint32_t min,uint32_t max,uint32_t * const plld,uint32_t * const pllm,uint32_t * const pllfm,uint32_t * const clkod)1128 uint32_t ti_pll_calc(struct ti_clk *clkp, const struct ti_pll_data *data,
1129 		     uint32_t input, uint32_t output, uint32_t min, uint32_t max,
1130 		     uint32_t * const plld, uint32_t * const pllm, uint32_t * const pllfm,
1131 		     uint32_t * const clkod)
1132 {
1133 	assert(clkp != NULL);
1134 	assert(data != NULL);
1135 	assert(plld != NULL);
1136 	assert(pllm != NULL);
1137 	assert(pllfm != NULL);
1138 	assert(clkod != NULL);
1139 
1140 	if ((input != 0U) && (output != 0U)) {
1141 		const struct ti_clk_data_pll *pll = ti_container_of(clkp->data,
1142 								    struct ti_clk_data_pll,
1143 								    data);
1144 		struct ti_pll_consider_data consider_data = {
1145 			.clk		= clkp,
1146 			.data		= data,
1147 			.pll		= pll,
1148 			.vco		= ti_clk_get_range(pll->vco_range_idx),
1149 			.vco_in		= ti_clk_get_range(pll->vco_in_range_idx),
1150 			.input		= input,
1151 			.output		= output,
1152 			.min		= min,
1153 			.max		= max,
1154 
1155 			.min_delta	= UINT_MAX,
1156 			.max_fitness	= 0U,
1157 			.max_bin	= 0,
1158 			.best_actual	= 0U,
1159 			.plld		= plld,
1160 			.pllm		= pllm,
1161 			.pllfm		= pllfm,
1162 			.clkod		= clkod,
1163 		};
1164 
1165 		/* Validate range pointers */
1166 		if ((consider_data.vco == NULL) || (consider_data.vco_in == NULL)) {
1167 			return 0U;
1168 		}
1169 
1170 		/* Check tables */
1171 		if ((consider_data.pll->pll_entries) != NULL) {
1172 			size_t i;
1173 
1174 			for (i = 0; i < consider_data.pll->pll_entries_count; i++) {
1175 				ti_pll_consider_entry(&consider_data,
1176 						      &ti_soc_pll_table
1177 						      [consider_data.pll->pll_entries[i]]);
1178 				if (consider_data.min_delta == 0U) {
1179 					/* Exact match found, done */
1180 					break;
1181 				}
1182 			}
1183 		}
1184 
1185 		/* No table match found, perform calculation */
1186 		if (consider_data.min_delta == UINT_MAX) {
1187 			ti_pll_internal_calc(&consider_data);
1188 		}
1189 
1190 		/*
1191 		 * When returning frequencies, used the rounded down value. This
1192 		 * makes sure that if freq=ret, min=ret, max=ret is passed as
1193 		 * arguments, we always succeed and calculate the same parameters
1194 		 * as before.
1195 		 */
1196 		return consider_data.best_actual;
1197 	}
1198 
1199 	return 0U;
1200 }
1201 
ti_pll_init(struct ti_clk * clkp)1202 int32_t ti_pll_init(struct ti_clk *clkp)
1203 {
1204 	const struct ti_clk_drv *drv;
1205 	const struct ti_clk_data_pll *data_pll;
1206 	uint32_t freq;
1207 
1208 	assert(clkp != NULL);
1209 
1210 	drv = clkp->drv;
1211 	data_pll = ti_container_of(clkp->data,
1212 				   const struct ti_clk_data_pll, data);
1213 
1214 	if (data_pll->default_freq_idx != 0U) {
1215 		const struct ti_clk_default *dflt;
1216 		bool changed;
1217 
1218 		/* Validate index bounds */
1219 		if (data_pll->default_freq_idx >= soc_clock_freq_defaults_count) {
1220 			return -EINVAL;
1221 		}
1222 
1223 		/* Lookup entry */
1224 		dflt = &soc_clock_freq_defaults[data_pll->default_freq_idx];
1225 
1226 		/* Attempt to set default frequency */
1227 		freq = drv->set_freq(clkp, dflt->target_hz, dflt->min_hz,
1228 				     dflt->max_hz, &changed);
1229 
1230 		/* set_freq returns 0 if default frequency is not set */
1231 		if (freq == 0U) {
1232 			return -EINVAL;
1233 		}
1234 	}
1235 
1236 	/*
1237 	 * We must always assume we are enabled as we could be operating
1238 	 * clocks in bypass.
1239 	 */
1240 	clkp->flags |= TI_CLK_FLAG_PWR_UP_EN;
1241 
1242 	return 0;
1243 }
1244