xref: /haiku/src/add-ons/accelerants/intel_extreme/pll.cpp (revision 4c8e85b316c35a9161f5a1c50ad70bc91c83a76f)
1 /*
2  * Copyright 2006-2018, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  *		Alexander von Gluck IV, kallisti5@unixzen.com
8  *		Adrien Destugues, pulkomandy@pulkomandy.tk
9  */
10 
11 
12 #include "pll.h"
13 
14 #include <math.h>
15 #include <stdio.h>
16 #include <string.h>
17 
18 #include <Debug.h>
19 
20 #include <create_display_modes.h>
21 #include <ddc.h>
22 #include <edid.h>
23 #include <validate_display_mode.h>
24 
25 #include "accelerant_protos.h"
26 #include "accelerant.h"
27 #include "utility.h"
28 
29 
30 #undef TRACE
31 #define TRACE_MODE
32 #ifdef TRACE_MODE
33 #	define TRACE(x...) _sPrintf("intel_extreme: " x)
34 #else
35 #	define TRACE(x...)
36 #endif
37 
38 #define ERROR(x...) _sPrintf("intel_extreme: " x)
39 #define CALLED(x...) TRACE("CALLED %s\n", __PRETTY_FUNCTION__)
40 
41 
42 // PLL limits, taken from i915 DRM driver. However, note that we use the values of
43 // N+2, M1+2 and M2+2 here, the - 2 being applied when we write the values to the registers.
44 
45 static pll_limits kLimits85x = {
46 	// p, p1, p2,  n,   m, m1, m2
47 	{  4,  2,  2,  4,  96, 20,  8},
48 	{128, 33,  4, 18, 140, 28, 18},
49 	165000, 908000, 1512000
50 };
51 
52 // For Iron Lake, a new set of timings is introduced along with the FDI system,
53 // and carried on to later cards with just one further change (to the P2 cutoff
54 // frequency) in Sandy Bridge.
55 
56 static pll_limits kLimits9xxSdvo = {
57 	// p, p1, p2,  n,   m, m1, m2
58 	{  5,  1,  5,  3,  70, 10,  5},	// min
59 	{ 80,  8, 10,  8, 120, 20,  9},	// max
60 	200000, 1400000, 2800000
61 };
62 
63 static pll_limits kLimits9xxLvds = {
64 	// p, p1, p2,  n,   m, m1, m2
65 	{  7,  1,  7,  3,  70, 10,  5},	// min
66 	{ 98,  8, 14,  8, 120, 20,  9},	// max
67 	112000, 1400000, 2800000
68 };
69 
70 // Limits for G45 cards taken from i915 DRM driver, mixed with old setup
71 // plus tests to accomodate lower resolutions with still correct refresh.
72 // Note that n here is actually n+2, same applies to m1 and m2.
73 
74 static pll_limits kLimitsG4xSdvo = {
75 	// p, p1, p2,  n,   m, m1, m2
76 	{ 10,  1, 10,  3, 104, 19,  7},	// min
77 	{ 80,  8, 10,  8, 138, 25, 13},	// max
78 	270000, 1750000, 3500000
79 };
80 
81 #if 0
82 static pll_limits kLimitsG4xHdmi = {
83 	// p, p1, p2,  n,   m, m1, m2
84 	{  5,  1,  5,  3, 104, 18,  7},	// min
85 	{ 80,  8, 10,  8, 138, 25, 13},	// max
86 	165000, 1750000, 3500000
87 };
88 #endif
89 
90 static pll_limits kLimitsG4xLvdsSingle = {
91 	// p, p1, p2,  n,   m, m1, m2
92 	{ 28,  2, 14,  3, 104, 19,  7},	// min
93 	{112,  8, 14,  8, 138, 25, 13},	// max
94 	0, 1750000, 3500000
95 };
96 
97 static pll_limits kLimitsG4xLvdsDual = {
98 	// p, p1, p2,  n,   m, m1, m2
99 	{ 14,  2,  7,  3, 104, 19,  7},	// min
100 	{ 42,  6,  7,  8, 138, 25, 13},	// max
101 	0, 1750000, 3500000
102 };
103 
104 static pll_limits kLimitsIlkDac = {
105 	// p, p1, p2, n,   m, m1, m2
106 	{  5,  1,  5, 3,  79, 14,  7}, // min
107 	{ 80,  8, 10, 7, 127, 24, 11}, // max
108 	225000, 1760000, 3510000
109 };
110 
111 static pll_limits kLimitsIlkLvdsSingle = {
112 	// p, p1, p2, n,   m, m1, m2
113 	{ 28,  2, 14, 3,  79, 14,  7}, // min
114 	{112,  8, 14, 5, 118, 24, 11}, // max
115 	225000, 1760000, 3510000
116 };
117 
118 static pll_limits kLimitsIlkLvdsDual = {
119 	// p, p1, p2, n,   m, m1, m2
120 	{ 14,  2,  7, 3,  79, 14,  7}, // min
121 	{ 56,  8,  7, 5, 127, 24, 11}, // max
122 	225000, 1760000, 3510000
123 };
124 
125 // 100Mhz RefClock
126 static pll_limits kLimitsIlkLvdsSingle100 = {
127 	// p, p1, p2, n,   m, m1, m2
128 	{ 28,  2, 14, 3,  79, 14,  7}, // min
129 	{112,  8, 14, 4, 126, 24, 11}, // max
130 	225000, 1760000, 3510000
131 };
132 
133 static pll_limits kLimitsIlkLvdsDual100 = {
134 	// p, p1, p2, n,   m, m1, m2
135 	{ 14,  2,  7, 3,  79, 14,  7}, // min
136 	{ 42,  6,  7, 5, 126, 24, 11}, // max
137 	225000, 1760000, 3510000
138 };
139 
140 // TODO From haswell onwards, a completely different PLL design is used
141 // (intel_gfx-prm-osrc-hsw-display_0.pdf, page 268 for VGA). It uses a "virtual
142 // root frequency" and one just has to set a single divider (integer and
143 // fractional parts), so it makes no sense to reuse the same code and limit
144 // structures there.
145 //
146 // For other display connections, the clock is handled differently, as there is
147 // no need for a precise timing to send things in sync with the display.
148 #if 0
149 static pll_limits kLimitsChv = {
150 	// p, p1, p2, n,   m, m1, m2
151 	{  0,  2,  1, 1,  79, 2,   24 << 22}, // min
152 	{  0,  4, 14, 1, 127, 2,  175 << 22}, // max
153 	0, 4800000, 6480000
154 };
155 
156 static pll_limits kLimitsVlv = {
157 	// p, p1, p2, n,   m, m1, m2
158 	{  0,  2,  2, 1,  79, 2,   11},	// min
159 	{  0,  3, 20, 7, 127, 3,  156},	// max
160 	0, 4000000, 6000000
161 };
162 
163 static pll_limits kLimitsBxt = {
164 	// p, p1, p2, n,  m, m1, m2
165 	{  0,  2,  1, 1,  0,  2,   2 << 22}, // min
166 	{  0,  4, 20, 1,  0,  2, 255 << 22}, // max
167 	0, 4800000, 6700000
168 };
169 #endif
170 
171 static pll_limits kLimitsPinSdvo = {
172 	// p, p1, p2, n,   m, m1,  m2
173 	{  5,  1,  5, 3,   2,  0,   0},	// min
174 	{ 80,  8, 10, 6, 256,  0, 254},	// max
175 	200000, 1700000, 3500000
176 };
177 
178 static pll_limits kLimitsPinLvds = {
179 	// p, p1, p2, n,   m, m1,  m2
180 	{  7,  1, 14, 3,   2,  0,   0},	// min
181 	{112,  8, 14, 6, 256,  0, 254},	// max
182 	112000, 1700000, 3500000
183 };
184 
185 
186 static bool
187 lvds_dual_link(display_timing* current)
188 {
189 	float requestedPixelClock = current->pixel_clock / 1000.0f;
190 	if (requestedPixelClock > 112.999)
191 		return true;
192 
193 	// TODO: Force dual link on MacBookPro6,2  MacBookPro8,2  MacBookPro9,1
194 
195 	return ((read32(INTEL_DIGITAL_LVDS_PORT) & LVDS_CLKB_POWER_MASK)
196 		== LVDS_CLKB_POWER_UP);
197 }
198 
199 
200 bool
201 valid_pll_divisors(pll_divisors* divisors, pll_limits* limits)
202 {
203 	pll_info &info = gInfo->shared_info->pll_info;
204 	uint32 vco = info.reference_frequency * divisors->m / divisors->n;
205 	uint32 frequency = vco / divisors->p;
206 
207 	if (divisors->p < limits->min.p || divisors->p > limits->max.p
208 		|| divisors->m < limits->min.m || divisors->m > limits->max.m
209 		|| vco < limits->min_vco || vco > limits->max_vco
210 		|| frequency < info.min_frequency || frequency > info.max_frequency)
211 		return false;
212 
213 	return true;
214 }
215 
216 
217 static void
218 compute_pll_p2(display_timing* current, pll_divisors* divisors,
219 	pll_limits* limits, bool isLVDS)
220 {
221 	if (isLVDS) {
222 		if (lvds_dual_link(current)) {
223 			// fast DAC timing via 2 channels (dual link LVDS)
224 			divisors->p2 = limits->min.p2;
225 		} else {
226 			// slow DAC timing
227 			divisors->p2 = limits->max.p2;
228 		}
229 	} else {
230 		if (current->pixel_clock < limits->dot_limit) {
231 			// slow DAC timing
232 			divisors->p2 = limits->max.p2;
233 		} else {
234 			// fast DAC timing
235 			divisors->p2 = limits->min.p2;
236 		}
237 	}
238 }
239 
240 
241 // TODO we can simplify this computation, with the way the dividers are set, we
242 // know that all values in the valid range for M are reachable. M1 allows to
243 // generate any multiple of 5 in the range and M2 allows to reach the 4 next
244 // values. Therefore, we don't need to loop over the range of values for M1 and
245 // M2 separately, we could instead just loop over possible values for M.
246 // For this to work, the logic of this function must be reversed: for a given M,
247 // it should give the resulting M1 and M2 values for programming the registers.
248 static uint32
249 compute_pll_m(pll_divisors* divisors)
250 {
251 	if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_CHV)
252 		|| gInfo->shared_info->device_type.InGroup(INTEL_GROUP_VLV)) {
253 		return divisors->m1 * divisors->m2;
254 	}
255 
256 	// Pineview, m1 is reserved
257 	if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_PIN))
258 		return divisors->m2;
259 
260 	return 5 * divisors->m1 + divisors->m2;
261 }
262 
263 
264 static uint32
265 compute_pll_p(pll_divisors* divisors)
266 {
267 	return divisors->p1 * divisors->p2;
268 }
269 
270 
271 static void
272 compute_dpll_g4x(display_timing* current, pll_divisors* divisors, bool isLVDS)
273 {
274 	float requestedPixelClock = current->pixel_clock / 1000.0f;
275 	float referenceClock
276 		= gInfo->shared_info->pll_info.reference_frequency / 1000.0f;
277 
278 	TRACE("%s: required MHz: %g, reference clock: %g\n", __func__,
279 		requestedPixelClock, referenceClock);
280 
281 	pll_limits limits;
282 	if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_G4x)) {
283 		// TODO: Pass port type via video_configuration
284 		if (isLVDS) {
285 			if (lvds_dual_link(current))
286 				memcpy(&limits, &kLimitsG4xLvdsDual, sizeof(pll_limits));
287 			else
288 				memcpy(&limits, &kLimitsG4xLvdsSingle, sizeof(pll_limits));
289 		//} else if (type == INTEL_PORT_TYPE_HDMI) {
290 		//	memcpy(&limits, &kLimitsG4xHdmi, sizeof(pll_limits));
291 		} else
292 			memcpy(&limits, &kLimitsG4xSdvo, sizeof(pll_limits));
293 	} else {
294 		// There must be a PCH, so this is ivy bridge or later
295 		if (isLVDS) {
296 			if (lvds_dual_link(current)) {
297 				if (referenceClock == 100.0)
298 					memcpy(&limits, &kLimitsIlkLvdsDual100, sizeof(pll_limits));
299 				else
300 					memcpy(&limits, &kLimitsIlkLvdsDual, sizeof(pll_limits));
301 			} else {
302 				if (referenceClock == 100.0) {
303 					memcpy(&limits, &kLimitsIlkLvdsSingle100,
304 						sizeof(pll_limits));
305 				} else {
306 					memcpy(&limits, &kLimitsIlkLvdsSingle, sizeof(pll_limits));
307 				}
308 			}
309 		} else {
310 			memcpy(&limits, &kLimitsIlkDac, sizeof(pll_limits));
311 		}
312 	}
313 
314 	compute_pll_p2(current, divisors, &limits, isLVDS);
315 
316 	TRACE("PLL limits, min: p %" B_PRId32 " (p1 %" B_PRId32 ", "
317 		"p2 %" B_PRId32 "), n %" B_PRId32 ", m %" B_PRId32 " "
318 		"(m1 %" B_PRId32 ", m2 %" B_PRId32 ")\n", limits.min.p,
319 		limits.min.p1, limits.min.p2, limits.min.n, limits.min.m,
320 		limits.min.m1, limits.min.m2);
321 	TRACE("PLL limits, max: p %" B_PRId32 " (p1 %" B_PRId32 ", "
322 		"p2 %" B_PRId32 "), n %" B_PRId32 ", m %" B_PRId32 " "
323 		"(m1 %" B_PRId32 ", m2 %" B_PRId32 ")\n", limits.max.p,
324 		limits.max.p1, limits.max.p2, limits.max.n, limits.max.m,
325 		limits.max.m1, limits.max.m2);
326 
327 	float best = requestedPixelClock;
328 	pll_divisors bestDivisors;
329 
330 	for (divisors->n = limits.min.n; divisors->n <= limits.max.n;
331 			divisors->n++) {
332 		for (divisors->m1 = limits.max.m1; divisors->m1 >= limits.min.m1;
333 				divisors->m1--) {
334 			for (divisors->m2 = limits.max.m2; divisors->m2 >= limits.min.m2;
335 					divisors->m2--) {
336 				for (divisors->p1 = limits.max.p1;
337 						divisors->p1 >= limits.min.p1; divisors->p1--) {
338 					divisors->m = compute_pll_m(divisors);
339 					divisors->p = compute_pll_p(divisors);
340 
341 					if (!valid_pll_divisors(divisors, &limits))
342 						continue;
343 
344 					float error = fabs(requestedPixelClock
345 						- (referenceClock * divisors->m)
346 						/ (divisors->n * divisors->p));
347 					if (error < best) {
348 						best = error;
349 						bestDivisors = *divisors;
350 
351 						if (error == 0)
352 							break;
353 					}
354 				}
355 			}
356 		}
357 	}
358 	*divisors = bestDivisors;
359 	TRACE("%s: best MHz: %g (error: %g)\n", __func__,
360 		(referenceClock * divisors->m) / (divisors->n * divisors->p),
361 		best);
362 }
363 
364 
365 static void
366 compute_dpll_9xx(display_timing* current, pll_divisors* divisors, bool isLVDS)
367 {
368 	float requestedPixelClock = current->pixel_clock / 1000.0f;
369 	float referenceClock
370 		= gInfo->shared_info->pll_info.reference_frequency / 1000.0f;
371 
372 	TRACE("%s: required MHz: %g\n", __func__, requestedPixelClock);
373 
374 	pll_limits limits;
375 	if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_PIN)) {
376 		if (isLVDS)
377 			memcpy(&limits, &kLimitsPinLvds, sizeof(pll_limits));
378 		else
379 			memcpy(&limits, &kLimitsPinSdvo, sizeof(pll_limits));
380 	} else if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_85x)) {
381 		memcpy(&limits, &kLimits85x, sizeof(pll_limits));
382 	} else {
383 		if (isLVDS)
384 			memcpy(&limits, &kLimits9xxLvds, sizeof(pll_limits));
385 		else
386 			memcpy(&limits, &kLimits9xxSdvo, sizeof(pll_limits));
387 	}
388 
389 	compute_pll_p2(current, divisors, &limits, isLVDS);
390 
391 	TRACE("PLL limits, min: p %" B_PRId32 " (p1 %" B_PRId32 ", "
392 		"p2 %" B_PRId32 "), n %" B_PRId32 ", m %" B_PRId32 " "
393 		"(m1 %" B_PRId32 ", m2 %" B_PRId32 ")\n", limits.min.p,
394 		limits.min.p1, limits.min.p2, limits.min.n, limits.min.m,
395 		limits.min.m1, limits.min.m2);
396 	TRACE("PLL limits, max: p %" B_PRId32 " (p1 %" B_PRId32 ", "
397 		"p2 %" B_PRId32 "), n %" B_PRId32 ", m %" B_PRId32 " "
398 		"(m1 %" B_PRId32 ", m2 %" B_PRId32 ")\n", limits.max.p,
399 		limits.max.p1, limits.max.p2, limits.max.n, limits.max.m,
400 		limits.max.m1, limits.max.m2);
401 
402 	bool is_pine = gInfo->shared_info->device_type.InGroup(INTEL_GROUP_PIN);
403 
404 	float best = requestedPixelClock;
405 	pll_divisors bestDivisors;
406 	memset(&bestDivisors, 0, sizeof(bestDivisors));
407 
408 	for (divisors->m1 = limits.min.m1; divisors->m1 <= limits.max.m1;
409 			divisors->m1++) {
410 		for (divisors->m2 = limits.min.m2; divisors->m2 <= limits.max.m2
411 				&& ((divisors->m2 < divisors->m1) || is_pine); divisors->m2++) {
412 			for (divisors->n = limits.min.n; divisors->n <= limits.max.n;
413 					divisors->n++) {
414 				for (divisors->p1 = limits.min.p1;
415 						divisors->p1 <= limits.max.p1; divisors->p1++) {
416 					divisors->m = compute_pll_m(divisors);
417 					divisors->p = compute_pll_p(divisors);
418 
419 					if (!valid_pll_divisors(divisors, &limits))
420 						continue;
421 
422 					float error = fabs(requestedPixelClock
423 						- (referenceClock * divisors->m)
424 						/ (divisors->n * divisors->p));
425 					if (error < best) {
426 						best = error;
427 						bestDivisors = *divisors;
428 
429 						if (error == 0)
430 							break;
431 					}
432 				}
433 			}
434 		}
435 	}
436 
437 	*divisors = bestDivisors;
438 
439 	if (best == requestedPixelClock)
440 		debugger("No valid PLL configuration found");
441 	else {
442 		TRACE("%s: best MHz: %g (error: %g)\n", __func__,
443 			(referenceClock * divisors->m) / (divisors->n * divisors->p),
444 			best);
445 	}
446 }
447 
448 
449 void
450 compute_pll_divisors(display_timing* current, pll_divisors* divisors, bool isLVDS)
451 {
452 	if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_G4x)
453 		|| (gInfo->shared_info->pch_info != INTEL_PCH_NONE)) {
454 		compute_dpll_g4x(current, divisors, isLVDS);
455 	} else if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_CHV)) {
456 		ERROR("%s: TODO: CherryView\n", __func__);
457 	} else if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_VLV)) {
458 		ERROR("%s: TODO: VallyView\n", __func__);
459 	} else
460 		compute_dpll_9xx(current, divisors, isLVDS);
461 
462 	TRACE("%s: found: p = %" B_PRId32 " (p1 = %" B_PRId32 ", "
463 		"p2 = %" B_PRId32 "), n = %" B_PRId32 ", m = %" B_PRId32 " "
464 		"(m1 = %" B_PRId32 ", m2 = %" B_PRId32 ")\n", __func__,
465 		divisors->p, divisors->p1, divisors->p2, divisors->n,
466 		divisors->m, divisors->m1, divisors->m2);
467 }
468 
469 
470 void
471 refclk_activate_ilk(bool hasPanel)
472 {
473 	CALLED();
474 
475 	// aka, our engineers hate you
476 
477 	bool wantsSSC;
478 	bool hasCK505;
479 	if (gInfo->shared_info->pch_info == INTEL_PCH_IBX) {
480 		//XXX: This should be == vbt display_clock_mode
481 		hasCK505 = false;
482 		wantsSSC = hasCK505;
483 	} else {
484 		hasCK505 = false;
485 		wantsSSC = true;
486 	}
487 
488 	uint32 clkRef = read32(PCH_DREF_CONTROL);
489 	uint32 newRef = clkRef;
490 
491 	newRef &= ~DREF_NONSPREAD_SOURCE_MASK;
492 
493 	if (hasCK505)
494 		newRef |= DREF_NONSPREAD_CK505_ENABLE;
495 	else
496 		newRef |= DREF_NONSPREAD_SOURCE_ENABLE;
497 
498 	newRef &= ~DREF_SSC_SOURCE_MASK;
499 	newRef &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
500 	newRef &= ~DREF_SSC1_ENABLE;
501 
502 	if (newRef == clkRef) {
503 		TRACE("%s: No changes to reference clock.\n", __func__);
504 		return;
505 	}
506 
507 	if (hasPanel) {
508 		newRef &= ~DREF_SSC_SOURCE_MASK;
509 		newRef |= DREF_SSC_SOURCE_ENABLE;
510 
511 		if (wantsSSC)
512 			newRef |= DREF_SSC1_ENABLE;
513 		else
514 			newRef &= ~DREF_SSC1_ENABLE;
515 
516 		// Power up SSC before enabling outputs
517 		write32(PCH_DREF_CONTROL, newRef);
518 		read32(PCH_DREF_CONTROL);
519 		spin(200);
520 
521 		newRef &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
522 
523 		bool hasEDP = true;
524 		if (hasEDP) {
525 			if (wantsSSC)
526 				newRef |= DREF_CPU_SOURCE_OUTPUT_DOWNSPREAD;
527 			else
528 				newRef |= DREF_CPU_SOURCE_OUTPUT_NONSPREAD;
529 		} else
530 			newRef |= DREF_CPU_SOURCE_OUTPUT_DISABLE;
531 
532 		write32(PCH_DREF_CONTROL, newRef);
533 		read32(PCH_DREF_CONTROL);
534 		spin(200);
535 	} else {
536 		newRef &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
537 		newRef |= DREF_CPU_SOURCE_OUTPUT_DISABLE;
538 
539 		write32(PCH_DREF_CONTROL, newRef);
540 		read32(PCH_DREF_CONTROL);
541 		spin(200);
542 
543 		if (!wantsSSC) {
544 			newRef &= ~DREF_SSC_SOURCE_MASK;
545 			newRef |= DREF_SSC_SOURCE_DISABLE;
546 			newRef &= ~DREF_SSC1_ENABLE;
547 
548 			write32(PCH_DREF_CONTROL, newRef);
549 			read32(PCH_DREF_CONTROL);
550 			spin(200);
551 		}
552 	}
553 }
554 
555 
556 //excerpt (plus modifications) from intel_dpll_mgr.c:
557 
558 /*
559  * Copyright © 2006-2016 Intel Corporation
560  *
561  * Permission is hereby granted, free of charge, to any person obtaining a
562  * copy of this software and associated documentation files (the "Software"),
563  * to deal in the Software without restriction, including without limitation
564  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
565  * and/or sell copies of the Software, and to permit persons to whom the
566  * Software is furnished to do so, subject to the following conditions:
567  *
568  * The above copyright notice and this permission notice (including the next
569  * paragraph) shall be included in all copies or substantial portions of the
570  * Software.
571  *
572  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
573  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
574  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
575  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
576  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
577  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
578  * DEALINGS IN THE SOFTWARE.
579  */
580 
581 #define LC_FREQ 2700
582 #define LC_FREQ_2K (uint64)(LC_FREQ * 2000)
583 
584 #define P_MIN 2
585 #define P_MAX 64
586 #define P_INC 2
587 
588 /* Constraints for PLL good behavior */
589 #define REF_MIN 48
590 #define REF_MAX 400
591 #define VCO_MIN 2400
592 #define VCO_MAX 4800
593 
594 static uint64 AbsSubtr64(uint64 nr1, uint64 nr2)
595 {
596 	if (nr1 >= nr2) {
597 		return nr1 - nr2;
598 	} else {
599 		return nr2 - nr1;
600 	}
601 }
602 
603 struct hsw_wrpll_rnp {
604 	unsigned p, n2, r2;
605 };
606 
607 static unsigned hsw_wrpll_get_budget_for_freq(int clock)
608 {
609 	unsigned budget;
610 
611 	switch (clock) {
612 	case 25175000:
613 	case 25200000:
614 	case 27000000:
615 	case 27027000:
616 	case 37762500:
617 	case 37800000:
618 	case 40500000:
619 	case 40541000:
620 	case 54000000:
621 	case 54054000:
622 	case 59341000:
623 	case 59400000:
624 	case 72000000:
625 	case 74176000:
626 	case 74250000:
627 	case 81000000:
628 	case 81081000:
629 	case 89012000:
630 	case 89100000:
631 	case 108000000:
632 	case 108108000:
633 	case 111264000:
634 	case 111375000:
635 	case 148352000:
636 	case 148500000:
637 	case 162000000:
638 	case 162162000:
639 	case 222525000:
640 	case 222750000:
641 	case 296703000:
642 	case 297000000:
643 		budget = 0;
644 		break;
645 	case 233500000:
646 	case 245250000:
647 	case 247750000:
648 	case 253250000:
649 	case 298000000:
650 		budget = 1500;
651 		break;
652 	case 169128000:
653 	case 169500000:
654 	case 179500000:
655 	case 202000000:
656 		budget = 2000;
657 		break;
658 	case 256250000:
659 	case 262500000:
660 	case 270000000:
661 	case 272500000:
662 	case 273750000:
663 	case 280750000:
664 	case 281250000:
665 	case 286000000:
666 	case 291750000:
667 		budget = 4000;
668 		break;
669 	case 267250000:
670 	case 268500000:
671 		budget = 5000;
672 		break;
673 	default:
674 		budget = 1000;
675 		break;
676 	}
677 
678 	return budget;
679 }
680 
681 static void hsw_wrpll_update_rnp(uint64 freq2k, unsigned int budget,
682 				 unsigned int r2, unsigned int n2,
683 				 unsigned int p,
684 				 struct hsw_wrpll_rnp *best)
685 {
686 	uint64 a, b, c, d, diff, diff_best;
687 
688 	/* No best (r,n,p) yet */
689 	if (best->p == 0) {
690 		best->p = p;
691 		best->n2 = n2;
692 		best->r2 = r2;
693 		return;
694 	}
695 
696 	/*
697 	 * Output clock is (LC_FREQ_2K / 2000) * N / (P * R), which compares to
698 	 * freq2k.
699 	 *
700 	 * delta = 1e6 *
701 	 *	   abs(freq2k - (LC_FREQ_2K * n2/(p * r2))) /
702 	 *	   freq2k;
703 	 *
704 	 * and we would like delta <= budget.
705 	 *
706 	 * If the discrepancy is above the PPM-based budget, always prefer to
707 	 * improve upon the previous solution.  However, if you're within the
708 	 * budget, try to maximize Ref * VCO, that is N / (P * R^2).
709 	 */
710 	a = freq2k * budget * p * r2;
711 	b = freq2k * budget * best->p * best->r2;
712 	diff = AbsSubtr64((uint64)freq2k * p * r2, LC_FREQ_2K * n2);
713 	diff_best = AbsSubtr64((uint64)freq2k * best->p * best->r2,
714 			     LC_FREQ_2K * best->n2);
715 	c = 1000000 * diff;
716 	d = 1000000 * diff_best;
717 
718 	if (a < c && b < d) {
719 		/* If both are above the budget, pick the closer */
720 		if (best->p * best->r2 * diff < p * r2 * diff_best) {
721 			best->p = p;
722 			best->n2 = n2;
723 			best->r2 = r2;
724 		}
725 	} else if (a >= c && b < d) {
726 		/* If A is below the threshold but B is above it?  Update. */
727 		best->p = p;
728 		best->n2 = n2;
729 		best->r2 = r2;
730 	} else if (a >= c && b >= d) {
731 		/* Both are below the limit, so pick the higher n2/(r2*r2) */
732 		if (n2 * best->r2 * best->r2 > best->n2 * r2 * r2) {
733 			best->p = p;
734 			best->n2 = n2;
735 			best->r2 = r2;
736 		}
737 	}
738 	/* Otherwise a < c && b >= d, do nothing */
739 }
740 
741 void
742 hsw_ddi_calculate_wrpll(int clock /* in Hz */,
743 			unsigned *r2_out, unsigned *n2_out, unsigned *p_out)
744 {
745 	uint64 freq2k;
746 	unsigned p, n2, r2;
747 	struct hsw_wrpll_rnp best = { 0, 0, 0 };
748 	unsigned budget;
749 
750 	freq2k = clock / 100;
751 
752 	budget = hsw_wrpll_get_budget_for_freq(clock);
753 
754 	/* Special case handling for 540 pixel clock: bypass WR PLL entirely
755 	 * and directly pass the LC PLL to it. */
756 	if (freq2k == 5400000) {
757 		*n2_out = 2;
758 		*p_out = 1;
759 		*r2_out = 2;
760 		return;
761 	}
762 
763 	/*
764 	 * Ref = LC_FREQ / R, where Ref is the actual reference input seen by
765 	 * the WR PLL.
766 	 *
767 	 * We want R so that REF_MIN <= Ref <= REF_MAX.
768 	 * Injecting R2 = 2 * R gives:
769 	 *   REF_MAX * r2 > LC_FREQ * 2 and
770 	 *   REF_MIN * r2 < LC_FREQ * 2
771 	 *
772 	 * Which means the desired boundaries for r2 are:
773 	 *  LC_FREQ * 2 / REF_MAX < r2 < LC_FREQ * 2 / REF_MIN
774 	 *
775 	 */
776 	for (r2 = LC_FREQ * 2 / REF_MAX + 1;
777 	     r2 <= LC_FREQ * 2 / REF_MIN;
778 	     r2++) {
779 
780 		/*
781 		 * VCO = N * Ref, that is: VCO = N * LC_FREQ / R
782 		 *
783 		 * Once again we want VCO_MIN <= VCO <= VCO_MAX.
784 		 * Injecting R2 = 2 * R and N2 = 2 * N, we get:
785 		 *   VCO_MAX * r2 > n2 * LC_FREQ and
786 		 *   VCO_MIN * r2 < n2 * LC_FREQ)
787 		 *
788 		 * Which means the desired boundaries for n2 are:
789 		 * VCO_MIN * r2 / LC_FREQ < n2 < VCO_MAX * r2 / LC_FREQ
790 		 */
791 		for (n2 = VCO_MIN * r2 / LC_FREQ + 1;
792 		     n2 <= VCO_MAX * r2 / LC_FREQ;
793 		     n2++) {
794 
795 			for (p = P_MIN; p <= P_MAX; p += P_INC)
796 				hsw_wrpll_update_rnp(freq2k, budget,
797 						     r2, n2, p, &best);
798 		}
799 	}
800 
801 	*n2_out = best.n2;
802 	*p_out = best.p;
803 	*r2_out = best.r2;
804 }
805 
806 struct skl_wrpll_context {
807 	uint64 min_deviation;		/* current minimal deviation */
808 	uint64 central_freq;		/* chosen central freq */
809 	uint64 dco_freq;			/* chosen dco freq */
810 	unsigned int p;				/* chosen divider */
811 };
812 
813 /* DCO freq must be within +1%/-6%  of the DCO central freq */
814 #define SKL_DCO_MAX_PDEVIATION	100
815 #define SKL_DCO_MAX_NDEVIATION	600
816 
817 static void skl_wrpll_try_divider(struct skl_wrpll_context *ctx,
818 				  uint64 central_freq,
819 				  uint64 dco_freq,
820 				  unsigned int divider)
821 {
822 	uint64 deviation;
823 
824 	deviation = ((uint64)10000 * AbsSubtr64(dco_freq, central_freq)
825 			      / central_freq);
826 
827 	/* positive deviation */
828 	if (dco_freq >= central_freq) {
829 		if (deviation < SKL_DCO_MAX_PDEVIATION &&
830 		    deviation < ctx->min_deviation) {
831 			ctx->min_deviation = deviation;
832 			ctx->central_freq = central_freq;
833 			ctx->dco_freq = dco_freq;
834 			ctx->p = divider;
835 
836 			TRACE("%s: DCO central frequency %" B_PRIu64 "Hz\n", __func__, central_freq);
837 			TRACE("%s: DCO frequency %" B_PRIu64 "Hz\n", __func__, dco_freq);
838 			TRACE("%s: positive offset accepted, deviation %" B_PRIu64 "\n",
839 				__func__, deviation);
840 		}
841 	/* negative deviation */
842 	} else if (deviation < SKL_DCO_MAX_NDEVIATION &&
843 		   deviation < ctx->min_deviation) {
844 		ctx->min_deviation = deviation;
845 		ctx->central_freq = central_freq;
846 		ctx->dco_freq = dco_freq;
847 		ctx->p = divider;
848 
849 		TRACE("%s: DCO central frequency %" B_PRIu64 "Hz\n", __func__, central_freq);
850 		TRACE("%s: DCO frequency %" B_PRIu64 "Hz\n", __func__, dco_freq);
851 		TRACE("%s: negative offset accepted, deviation %" B_PRIu64 "\n",
852 			__func__, deviation);
853 	}
854 }
855 
856 static void skl_wrpll_get_multipliers(unsigned int p,
857 				      unsigned int *p0 /* out */,
858 				      unsigned int *p1 /* out */,
859 				      unsigned int *p2 /* out */)
860 {
861 	/* even dividers */
862 	if (p % 2 == 0) {
863 		unsigned int half = p / 2;
864 
865 		if (half == 1 || half == 2 || half == 3 || half == 5) {
866 			*p0 = 2;
867 			*p1 = 1;
868 			*p2 = half;
869 		} else if (half % 2 == 0) {
870 			*p0 = 2;
871 			*p1 = half / 2;
872 			*p2 = 2;
873 		} else if (half % 3 == 0) {
874 			*p0 = 3;
875 			*p1 = half / 3;
876 			*p2 = 2;
877 		} else if (half % 7 == 0) {
878 			*p0 = 7;
879 			*p1 = half / 7;
880 			*p2 = 2;
881 		}
882 	} else if (p == 3 || p == 9) {  /* 3, 5, 7, 9, 15, 21, 35 */
883 		*p0 = 3;
884 		*p1 = 1;
885 		*p2 = p / 3;
886 	} else if (p == 5 || p == 7) {
887 		*p0 = p;
888 		*p1 = 1;
889 		*p2 = 1;
890 	} else if (p == 15) {
891 		*p0 = 3;
892 		*p1 = 1;
893 		*p2 = 5;
894 	} else if (p == 21) {
895 		*p0 = 7;
896 		*p1 = 1;
897 		*p2 = 3;
898 	} else if (p == 35) {
899 		*p0 = 7;
900 		*p1 = 1;
901 		*p2 = 5;
902 	}
903 }
904 
905 static void skl_wrpll_context_init(struct skl_wrpll_context *ctx)
906 {
907 	memset(ctx, 0, sizeof(*ctx));
908 	ctx->min_deviation = UINT64_MAX;
909 }
910 
911 static void skl_wrpll_params_populate(struct skl_wrpll_params *params,
912 				      uint64 afe_clock,
913 				      int ref_clock,
914 				      uint64 central_freq,
915 				      uint32 p0, uint32 p1, uint32 p2)
916 {
917 	uint64 dco_freq;
918 
919 	switch (central_freq) {
920 	case 9600000000ULL:
921 		params->central_freq = 0;
922 		break;
923 	case 9000000000ULL:
924 		params->central_freq = 1;
925 		break;
926 	case 8400000000ULL:
927 		params->central_freq = 3;
928 	}
929 
930 	switch (p0) {
931 	case 1:
932 		params->pdiv = 0;
933 		break;
934 	case 2:
935 		params->pdiv = 1;
936 		break;
937 	case 3:
938 		params->pdiv = 2;
939 		break;
940 	case 7:
941 		params->pdiv = 4;
942 		break;
943 	default:
944 		TRACE("%s: Incorrect PDiv\n", __func__);
945 	}
946 
947 	switch (p2) {
948 	case 5:
949 		params->kdiv = 0;
950 		break;
951 	case 2:
952 		params->kdiv = 1;
953 		break;
954 	case 3:
955 		params->kdiv = 2;
956 		break;
957 	case 1:
958 		params->kdiv = 3;
959 		break;
960 	default:
961 		TRACE("%s: Incorrect KDiv\n", __func__);
962 	}
963 
964 	params->qdiv_ratio = p1;
965 	params->qdiv_mode = (params->qdiv_ratio == 1) ? 0 : 1;
966 
967 	dco_freq = p0 * p1 * p2 * afe_clock;
968 	TRACE("%s: AFE frequency %" B_PRIu64 "Hz\n", __func__, afe_clock);
969 	TRACE("%s: p0: %" B_PRIu32 ", p1: %" B_PRIu32 ", p2: %" B_PRIu32 "\n",
970 		__func__, p0,p1,p2);
971 	TRACE("%s: DCO frequency %" B_PRIu64 "Hz\n", __func__, dco_freq);
972 
973 	/*
974 	 * Intermediate values are in Hz.
975 	 * Divide by MHz to match bsepc
976 	 */
977 	params->dco_integer = (uint64)dco_freq / ((uint64)ref_clock * 1000);
978 	params->dco_fraction = (
979 			(uint64)dco_freq / ((uint64)ref_clock / 1000) -
980 			(uint64)params->dco_integer * 1000000) * 0x8000 /
981 			1000000;
982 
983 	TRACE("%s: Reference clock: %gMhz\n", __func__, ref_clock / 1000.0f);
984 	TRACE("%s: DCO integer %" B_PRIu32 "\n", __func__, params->dco_integer);
985 	TRACE("%s: DCO fraction 0x%" B_PRIx32 "\n", __func__, params->dco_fraction);
986 }
987 
988 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
989 
990 bool
991 skl_ddi_calculate_wrpll(int clock /* in Hz */,
992 			int ref_clock,
993 			struct skl_wrpll_params *wrpll_params)
994 {
995 	uint64 afe_clock = (uint64) clock * 5; /* AFE Clock is 5x Pixel clock */
996 	uint64 dco_central_freq[3] = { 8400000000ULL,
997 				    9000000000ULL,
998 				    9600000000ULL };
999 	static const int even_dividers[] = {  4,  6,  8, 10, 12, 14, 16, 18, 20,
1000 					     24, 28, 30, 32, 36, 40, 42, 44,
1001 					     48, 52, 54, 56, 60, 64, 66, 68,
1002 					     70, 72, 76, 78, 80, 84, 88, 90,
1003 					     92, 96, 98 };
1004 	static const int odd_dividers[] = { 3, 5, 7, 9, 15, 21, 35 };
1005 	static const struct {
1006 		const int *list;
1007 		unsigned int n_dividers;
1008 	} dividers[] = {
1009 		{ even_dividers, ARRAY_SIZE(even_dividers) },
1010 		{ odd_dividers, ARRAY_SIZE(odd_dividers) },
1011 	};
1012 	struct skl_wrpll_context ctx;
1013 	unsigned int dco, d, i;
1014 	unsigned int p0, p1, p2;
1015 
1016 	skl_wrpll_context_init(&ctx);
1017 
1018 	for (d = 0; d < ARRAY_SIZE(dividers); d++) {
1019 		for (dco = 0; dco < ARRAY_SIZE(dco_central_freq); dco++) {
1020 			for (i = 0; i < dividers[d].n_dividers; i++) {
1021 				unsigned int p = dividers[d].list[i];
1022 				uint64 dco_freq = p * afe_clock;
1023 
1024 				skl_wrpll_try_divider(&ctx,
1025 						      dco_central_freq[dco],
1026 						      dco_freq,
1027 						      p);
1028 				/*
1029 				 * Skip the remaining dividers if we're sure to
1030 				 * have found the definitive divider, we can't
1031 				 * improve a 0 deviation.
1032 				 */
1033 				if (ctx.min_deviation == 0)
1034 					goto skip_remaining_dividers;
1035 			}
1036 		}
1037 
1038 skip_remaining_dividers:
1039 		/*
1040 		 * If a solution is found with an even divider, prefer
1041 		 * this one.
1042 		 */
1043 		if (d == 0 && ctx.p)
1044 			break;
1045 	}
1046 
1047 	if (!ctx.p) {
1048 		TRACE("%s: No valid divider found for %dHz\n", __func__, clock);
1049 		return false;
1050 	}
1051 	TRACE("%s: Full divider (p) found is %d\n", __func__, ctx.p);
1052 
1053 	/*
1054 	 * gcc incorrectly analyses that these can be used without being
1055 	 * initialized. To be fair, it's hard to guess.
1056 	 */
1057 	p0 = p1 = p2 = 0;
1058 	skl_wrpll_get_multipliers(ctx.p, &p0, &p1, &p2);
1059 	skl_wrpll_params_populate(wrpll_params, afe_clock, ref_clock,
1060 				  ctx.central_freq, p0, p1, p2);
1061 
1062 	return true;
1063 }
1064 
1065