xref: /haiku/src/add-ons/accelerants/intel_extreme/pll.cpp (revision 9a6a20d4689307142a7ed26a1437ba47e244e73f)
1 /*
2  * Copyright 2006-2018, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  *		Alexander von Gluck IV, kallisti5@unixzen.com
8  *		Adrien Destugues, pulkomandy@pulkomandy.tk
9  */
10 
11 
12 #include "pll.h"
13 
14 #include <math.h>
15 #include <stdio.h>
16 #include <string.h>
17 
18 #include <Debug.h>
19 
20 #include <create_display_modes.h>
21 #include <ddc.h>
22 #include <edid.h>
23 #include <validate_display_mode.h>
24 
25 #include "accelerant_protos.h"
26 #include "accelerant.h"
27 #include "utility.h"
28 
29 
30 #undef TRACE
31 #define TRACE_MODE
32 #ifdef TRACE_MODE
33 #	define TRACE(x...) _sPrintf("intel_extreme: " x)
34 #else
35 #	define TRACE(x...)
36 #endif
37 
38 #define ERROR(x...) _sPrintf("intel_extreme: " x)
39 #define CALLED(x...) TRACE("CALLED %s\n", __PRETTY_FUNCTION__)
40 
41 
42 // PLL limits, taken from i915 DRM driver. However, note that we use the values of
43 // N+2, M1+2 and M2+2 here, the - 2 being applied when we write the values to the registers.
44 
45 static pll_limits kLimits85x = {
46 	// p, p1, p2,  n,   m, m1, m2
47 	{  4,  2,  2,  4,  96, 20,  8},
48 	{128, 33,  4, 18, 140, 28, 18},
49 	165000, 908000, 1512000
50 };
51 
52 // For Iron Lake, a new set of timings is introduced along with the FDI system,
53 // and carried on to later cards with just one further change (to the P2 cutoff
54 // frequency) in Sandy Bridge.
55 
56 static pll_limits kLimits9xxSdvo = {
57 	// p, p1, p2,  n,   m, m1, m2
58 	{  5,  1,  5,  3,  70, 10,  5},	// min
59 	{ 80,  8, 10,  8, 120, 20,  9},	// max
60 	200000, 1400000, 2800000
61 };
62 
63 static pll_limits kLimits9xxLvds = {
64 	// p, p1, p2,  n,   m, m1, m2
65 	{  7,  1,  7,  3,  70, 10,  5},	// min
66 	{ 98,  8, 14,  8, 120, 20,  9},	// max
67 	112000, 1400000, 2800000
68 };
69 
70 // Limits for G45 cards taken from i915 DRM driver, mixed with old setup
71 // plus tests to accomodate lower resolutions with still correct refresh.
72 // Note that n here is actually n+2, same applies to m1 and m2.
73 
74 static pll_limits kLimitsG4xSdvo = {
75 	// p, p1, p2,  n,   m, m1, m2
76 	{ 10,  1, 10,  3, 104, 19,  7},	// min
77 	{ 80,  8, 10,  8, 138, 25, 13},	// max
78 	270000, 1750000, 3500000
79 };
80 
81 #if 0
82 static pll_limits kLimitsG4xHdmi = {
83 	// p, p1, p2,  n,   m, m1, m2
84 	{  5,  1,  5,  3, 104, 18,  7},	// min
85 	{ 80,  8, 10,  8, 138, 25, 13},	// max
86 	165000, 1750000, 3500000
87 };
88 #endif
89 
90 static pll_limits kLimitsG4xLvdsSingle = {
91 	// p, p1, p2,  n,   m, m1, m2
92 	{ 28,  2, 14,  3, 104, 19,  7},	// min
93 	{112,  8, 14,  8, 138, 25, 13},	// max
94 	0, 1750000, 3500000
95 };
96 
97 static pll_limits kLimitsG4xLvdsDual = {
98 	// p, p1, p2,  n,   m, m1, m2
99 	{ 14,  2,  7,  3, 104, 19,  7},	// min
100 	{ 42,  6,  7,  8, 138, 25, 13},	// max
101 	0, 1750000, 3500000
102 };
103 
104 static pll_limits kLimitsIlkDac = {
105 	// p, p1, p2, n,   m, m1, m2
106 	{  5,  1,  5, 3,  79, 14,  7}, // min
107 	{ 80,  8, 10, 7, 127, 24, 11}, // max
108 	225000, 1760000, 3510000
109 };
110 
111 static pll_limits kLimitsIlkLvdsSingle = {
112 	// p, p1, p2, n,   m, m1, m2
113 	{ 28,  2, 14, 3,  79, 14,  7}, // min
114 	{112,  8, 14, 5, 118, 24, 11}, // max
115 	225000, 1760000, 3510000
116 };
117 
118 static pll_limits kLimitsIlkLvdsDual = {
119 	// p, p1, p2, n,   m, m1, m2
120 	{ 14,  2,  7, 3,  79, 14,  7}, // min
121 	{ 56,  8,  7, 5, 127, 24, 11}, // max
122 	225000, 1760000, 3510000
123 };
124 
125 // 100Mhz RefClock
126 static pll_limits kLimitsIlkLvdsSingle100 = {
127 	// p, p1, p2, n,   m, m1, m2
128 	{ 28,  2, 14, 3,  79, 14,  7}, // min
129 	{112,  8, 14, 4, 126, 24, 11}, // max
130 	225000, 1760000, 3510000
131 };
132 
133 static pll_limits kLimitsIlkLvdsDual100 = {
134 	// p, p1, p2, n,   m, m1, m2
135 	{ 14,  2,  7, 3,  79, 14,  7}, // min
136 	{ 42,  6,  7, 5, 126, 24, 11}, // max
137 	225000, 1760000, 3510000
138 };
139 
140 // TODO From haswell onwards, a completely different PLL design is used
141 // (intel_gfx-prm-osrc-hsw-display_0.pdf, page 268 for VGA). It uses a "virtual
142 // root frequency" and one just has to set a single divider (integer and
143 // fractional parts), so it makes no sense to reuse the same code and limit
144 // structures there.
145 //
146 // For other display connections, the clock is handled differently, as there is
147 // no need for a precise timing to send things in sync with the display.
148 #if 0
149 static pll_limits kLimitsChv = {
150 	// p, p1, p2, n,   m, m1, m2
151 	{  0,  2,  1, 1,  79, 2,   24 << 22}, // min
152 	{  0,  4, 14, 1, 127, 2,  175 << 22}, // max
153 	0, 4800000, 6480000
154 };
155 
156 static pll_limits kLimitsVlv = {
157 	// p, p1, p2, n,   m, m1, m2
158 	{  0,  2,  2, 1,  79, 2,   11},	// min
159 	{  0,  3, 20, 7, 127, 3,  156},	// max
160 	0, 4000000, 6000000
161 };
162 
163 static pll_limits kLimitsBxt = {
164 	// p, p1, p2, n,  m, m1, m2
165 	{  0,  2,  1, 1,  0,  2,   2 << 22}, // min
166 	{  0,  4, 20, 1,  0,  2, 255 << 22}, // max
167 	0, 4800000, 6700000
168 };
169 #endif
170 
171 static pll_limits kLimitsPinSdvo = {
172 	// p, p1, p2, n,   m, m1,  m2
173 	{  5,  1,  5, 3,   2,  0,   0},	// min
174 	{ 80,  8, 10, 6, 256,  0, 254},	// max
175 	200000, 1700000, 3500000
176 };
177 
178 static pll_limits kLimitsPinLvds = {
179 	// p, p1, p2, n,   m, m1,  m2
180 	{  7,  1, 14, 3,   2,  0,   0},	// min
181 	{112,  8, 14, 6, 256,  0, 254},	// max
182 	112000, 1700000, 3500000
183 };
184 
185 
186 static bool
187 lvds_dual_link(display_timing* current)
188 {
189 	float requestedPixelClock = current->pixel_clock / 1000.0f;
190 	if (requestedPixelClock > 112.999)
191 		return true;
192 
193 	// TODO: Force dual link on MacBookPro6,2  MacBookPro8,2  MacBookPro9,1
194 
195 	return ((read32(INTEL_DIGITAL_LVDS_PORT) & LVDS_CLKB_POWER_MASK)
196 		== LVDS_CLKB_POWER_UP);
197 }
198 
199 
200 bool
201 valid_pll_divisors(pll_divisors* divisors, pll_limits* limits)
202 {
203 	pll_info &info = gInfo->shared_info->pll_info;
204 	uint32 vco = info.reference_frequency * divisors->m / divisors->n;
205 	uint32 frequency = vco / divisors->p;
206 
207 	if (divisors->p < limits->min.p || divisors->p > limits->max.p
208 		|| divisors->m < limits->min.m || divisors->m > limits->max.m
209 		|| vco < limits->min_vco || vco > limits->max_vco
210 		|| frequency < info.min_frequency || frequency > info.max_frequency)
211 		return false;
212 
213 	return true;
214 }
215 
216 
217 static void
218 compute_pll_p2(display_timing* current, pll_divisors* divisors,
219 	pll_limits* limits, bool isLVDS)
220 {
221 	if (isLVDS) {
222 		if (lvds_dual_link(current)) {
223 			// fast DAC timing via 2 channels (dual link LVDS)
224 			divisors->p2 = limits->min.p2;
225 		} else {
226 			// slow DAC timing
227 			divisors->p2 = limits->max.p2;
228 		}
229 	} else {
230 		if (current->pixel_clock < limits->dot_limit) {
231 			// slow DAC timing
232 			divisors->p2 = limits->max.p2;
233 		} else {
234 			// fast DAC timing
235 			divisors->p2 = limits->min.p2;
236 		}
237 	}
238 }
239 
240 
241 // TODO we can simplify this computation, with the way the dividers are set, we
242 // know that all values in the valid range for M are reachable. M1 allows to
243 // generate any multiple of 5 in the range and M2 allows to reach the 4 next
244 // values. Therefore, we don't need to loop over the range of values for M1 and
245 // M2 separately, we could instead just loop over possible values for M.
246 // For this to work, the logic of this function must be reversed: for a given M,
247 // it should give the resulting M1 and M2 values for programming the registers.
248 static uint32
249 compute_pll_m(pll_divisors* divisors)
250 {
251 	if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_CHV)
252 		|| gInfo->shared_info->device_type.InGroup(INTEL_GROUP_VLV)) {
253 		return divisors->m1 * divisors->m2;
254 	}
255 
256 	// Pineview, m1 is reserved
257 	if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_PIN))
258 		return divisors->m2;
259 
260 	return 5 * divisors->m1 + divisors->m2;
261 }
262 
263 
264 static uint32
265 compute_pll_p(pll_divisors* divisors)
266 {
267 	return divisors->p1 * divisors->p2;
268 }
269 
270 
271 static void
272 compute_dpll_g4x(display_timing* current, pll_divisors* divisors, bool isLVDS)
273 {
274 	float requestedPixelClock = current->pixel_clock / 1000.0f;
275 	float referenceClock
276 		= gInfo->shared_info->pll_info.reference_frequency / 1000.0f;
277 
278 	TRACE("%s: required MHz: %g, reference clock: %g\n", __func__,
279 		requestedPixelClock, referenceClock);
280 
281 	pll_limits limits;
282 	if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_G4x)) {
283 		// TODO: Pass port type via video_configuration
284 		if (isLVDS) {
285 			if (lvds_dual_link(current))
286 				memcpy(&limits, &kLimitsG4xLvdsDual, sizeof(pll_limits));
287 			else
288 				memcpy(&limits, &kLimitsG4xLvdsSingle, sizeof(pll_limits));
289 		//} else if (type == INTEL_PORT_TYPE_HDMI) {
290 		//	memcpy(&limits, &kLimitsG4xHdmi, sizeof(pll_limits));
291 		} else
292 			memcpy(&limits, &kLimitsG4xSdvo, sizeof(pll_limits));
293 	} else {
294 		// There must be a PCH, so this is ivy bridge or later
295 		if (isLVDS) {
296 			if (lvds_dual_link(current)) {
297 				if (referenceClock == 100.0)
298 					memcpy(&limits, &kLimitsIlkLvdsDual100, sizeof(pll_limits));
299 				else
300 					memcpy(&limits, &kLimitsIlkLvdsDual, sizeof(pll_limits));
301 			} else {
302 				if (referenceClock == 100.0) {
303 					memcpy(&limits, &kLimitsIlkLvdsSingle100,
304 						sizeof(pll_limits));
305 				} else {
306 					memcpy(&limits, &kLimitsIlkLvdsSingle, sizeof(pll_limits));
307 				}
308 			}
309 		} else {
310 			memcpy(&limits, &kLimitsIlkDac, sizeof(pll_limits));
311 		}
312 	}
313 
314 	compute_pll_p2(current, divisors, &limits, isLVDS);
315 
316 	TRACE("PLL limits, min: p %" B_PRId32 " (p1 %" B_PRId32 ", "
317 		"p2 %" B_PRId32 "), n %" B_PRId32 ", m %" B_PRId32 " "
318 		"(m1 %" B_PRId32 ", m2 %" B_PRId32 ")\n", limits.min.p,
319 		limits.min.p1, limits.min.p2, limits.min.n, limits.min.m,
320 		limits.min.m1, limits.min.m2);
321 	TRACE("PLL limits, max: p %" B_PRId32 " (p1 %" B_PRId32 ", "
322 		"p2 %" B_PRId32 "), n %" B_PRId32 ", m %" B_PRId32 " "
323 		"(m1 %" B_PRId32 ", m2 %" B_PRId32 ")\n", limits.max.p,
324 		limits.max.p1, limits.max.p2, limits.max.n, limits.max.m,
325 		limits.max.m1, limits.max.m2);
326 
327 	float best = requestedPixelClock;
328 	pll_divisors bestDivisors;
329 
330 	for (divisors->n = limits.min.n; divisors->n <= limits.max.n;
331 			divisors->n++) {
332 		for (divisors->m1 = limits.max.m1; divisors->m1 >= limits.min.m1;
333 				divisors->m1--) {
334 			for (divisors->m2 = limits.max.m2; divisors->m2 >= limits.min.m2;
335 					divisors->m2--) {
336 				for (divisors->p1 = limits.max.p1;
337 						divisors->p1 >= limits.min.p1; divisors->p1--) {
338 					divisors->m = compute_pll_m(divisors);
339 					divisors->p = compute_pll_p(divisors);
340 
341 					if (!valid_pll_divisors(divisors, &limits))
342 						continue;
343 
344 					float error = fabs(requestedPixelClock
345 						- (referenceClock * divisors->m)
346 						/ (divisors->n * divisors->p));
347 					if (error < best) {
348 						best = error;
349 						bestDivisors = *divisors;
350 
351 						if (error == 0)
352 							break;
353 					}
354 				}
355 			}
356 		}
357 	}
358 	*divisors = bestDivisors;
359 	TRACE("%s: best MHz: %g (error: %g)\n", __func__,
360 		(referenceClock * divisors->m) / (divisors->n * divisors->p),
361 		best);
362 }
363 
364 
365 static void
366 compute_dpll_9xx(display_timing* current, pll_divisors* divisors, bool isLVDS)
367 {
368 	float requestedPixelClock = current->pixel_clock / 1000.0f;
369 	float referenceClock
370 		= gInfo->shared_info->pll_info.reference_frequency / 1000.0f;
371 
372 	TRACE("%s: required MHz: %g\n", __func__, requestedPixelClock);
373 
374 	pll_limits limits;
375 	if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_PIN)) {
376 		if (isLVDS)
377 			memcpy(&limits, &kLimitsPinLvds, sizeof(pll_limits));
378 		else
379 			memcpy(&limits, &kLimitsPinSdvo, sizeof(pll_limits));
380 	} else if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_85x)) {
381 		memcpy(&limits, &kLimits85x, sizeof(pll_limits));
382 	} else {
383 		if (isLVDS)
384 			memcpy(&limits, &kLimits9xxLvds, sizeof(pll_limits));
385 		else
386 			memcpy(&limits, &kLimits9xxSdvo, sizeof(pll_limits));
387 	}
388 
389 	compute_pll_p2(current, divisors, &limits, isLVDS);
390 
391 	TRACE("PLL limits, min: p %" B_PRId32 " (p1 %" B_PRId32 ", "
392 		"p2 %" B_PRId32 "), n %" B_PRId32 ", m %" B_PRId32 " "
393 		"(m1 %" B_PRId32 ", m2 %" B_PRId32 ")\n", limits.min.p,
394 		limits.min.p1, limits.min.p2, limits.min.n, limits.min.m,
395 		limits.min.m1, limits.min.m2);
396 	TRACE("PLL limits, max: p %" B_PRId32 " (p1 %" B_PRId32 ", "
397 		"p2 %" B_PRId32 "), n %" B_PRId32 ", m %" B_PRId32 " "
398 		"(m1 %" B_PRId32 ", m2 %" B_PRId32 ")\n", limits.max.p,
399 		limits.max.p1, limits.max.p2, limits.max.n, limits.max.m,
400 		limits.max.m1, limits.max.m2);
401 
402 	bool is_pine = gInfo->shared_info->device_type.InGroup(INTEL_GROUP_PIN);
403 
404 	float best = requestedPixelClock;
405 	pll_divisors bestDivisors;
406 	memset(&bestDivisors, 0, sizeof(bestDivisors));
407 
408 	for (divisors->m1 = limits.min.m1; divisors->m1 <= limits.max.m1;
409 			divisors->m1++) {
410 		for (divisors->m2 = limits.min.m2; divisors->m2 <= limits.max.m2
411 				&& ((divisors->m2 < divisors->m1) || is_pine); divisors->m2++) {
412 			for (divisors->n = limits.min.n; divisors->n <= limits.max.n;
413 					divisors->n++) {
414 				for (divisors->p1 = limits.min.p1;
415 						divisors->p1 <= limits.max.p1; divisors->p1++) {
416 					divisors->m = compute_pll_m(divisors);
417 					divisors->p = compute_pll_p(divisors);
418 
419 					if (!valid_pll_divisors(divisors, &limits))
420 						continue;
421 
422 					float error = fabs(requestedPixelClock
423 						- (referenceClock * divisors->m)
424 						/ (divisors->n * divisors->p));
425 					if (error < best) {
426 						best = error;
427 						bestDivisors = *divisors;
428 
429 						if (error == 0)
430 							break;
431 					}
432 				}
433 			}
434 		}
435 	}
436 
437 	*divisors = bestDivisors;
438 
439 	if (best == requestedPixelClock)
440 		debugger("No valid PLL configuration found");
441 	else {
442 		TRACE("%s: best MHz: %g (error: %g)\n", __func__,
443 			(referenceClock * divisors->m) / (divisors->n * divisors->p),
444 			best);
445 	}
446 }
447 
448 
449 void
450 compute_pll_divisors(display_timing* current, pll_divisors* divisors, bool isLVDS)
451 {
452 	if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_G4x)
453 		|| (gInfo->shared_info->pch_info != INTEL_PCH_NONE)) {
454 		compute_dpll_g4x(current, divisors, isLVDS);
455 	} else if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_CHV)) {
456 		ERROR("%s: TODO: CherryView\n", __func__);
457 	} else if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_VLV)) {
458 		ERROR("%s: TODO: VallyView\n", __func__);
459 	} else
460 		compute_dpll_9xx(current, divisors, isLVDS);
461 
462 	TRACE("%s: found: p = %" B_PRId32 " (p1 = %" B_PRId32 ", "
463 		"p2 = %" B_PRId32 "), n = %" B_PRId32 ", m = %" B_PRId32 " "
464 		"(m1 = %" B_PRId32 ", m2 = %" B_PRId32 ")\n", __func__,
465 		divisors->p, divisors->p1, divisors->p2, divisors->n,
466 		divisors->m, divisors->m1, divisors->m2);
467 }
468 
469 
470 void
471 refclk_activate_ilk(bool hasPanel)
472 {
473 	CALLED();
474 
475 	bool wantsSSC;
476 	bool hasCK505;
477 	if (gInfo->shared_info->pch_info == INTEL_PCH_IBX) {
478 		TRACE("%s: Generation 5 graphics\n", __func__);
479 		//XXX: This should be == vbt display_clock_mode
480 		hasCK505 = false;
481 		wantsSSC = hasCK505;
482 	} else {
483 		if (gInfo->shared_info->device_type.Generation() == 6) {
484 			TRACE("%s: Generation 6 graphics\n", __func__);
485 		} else {
486 			TRACE("%s: Generation 7 graphics\n", __func__);
487 		}
488 		hasCK505 = false;
489 		wantsSSC = true;
490 	}
491 
492 	uint32 clkRef = read32(PCH_DREF_CONTROL);
493 	uint32 newRef = clkRef;
494 	TRACE("%s: PCH_DREF_CONTROL before: 0x%" B_PRIx32 "\n", __func__, clkRef);
495 
496 	newRef &= ~DREF_NONSPREAD_SOURCE_MASK;
497 
498 	if (hasCK505)
499 		newRef |= DREF_NONSPREAD_CK505_ENABLE;
500 	else
501 		newRef |= DREF_NONSPREAD_SOURCE_ENABLE;
502 
503 	newRef &= ~DREF_SSC_SOURCE_MASK;
504 	newRef &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
505 	newRef &= ~DREF_SSC1_ENABLE;
506 
507 	if (newRef == clkRef) {
508 		TRACE("%s: No changes to reference clock.\n", __func__);
509 		return;
510 	}
511 
512 	if (hasPanel) {
513 		newRef &= ~DREF_SSC_SOURCE_MASK;
514 		newRef |= DREF_SSC_SOURCE_ENABLE;
515 
516 		if (wantsSSC)
517 			newRef |= DREF_SSC1_ENABLE;
518 		else
519 			newRef &= ~DREF_SSC1_ENABLE;
520 
521 		// Power up SSC before enabling outputs
522 		write32(PCH_DREF_CONTROL, newRef);
523 		read32(PCH_DREF_CONTROL);
524 		TRACE("%s: PCH_DREF_CONTROL after SSC on/off: 0x%" B_PRIx32 "\n",
525 				__func__, read32(PCH_DREF_CONTROL));
526 		spin(200);
527 
528 		newRef &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
529 
530 		bool hasEDP = true;
531 		if (hasEDP) {
532 			if (wantsSSC)
533 				newRef |= DREF_CPU_SOURCE_OUTPUT_DOWNSPREAD;
534 			else
535 				newRef |= DREF_CPU_SOURCE_OUTPUT_NONSPREAD;
536 		} else
537 			newRef |= DREF_CPU_SOURCE_OUTPUT_DISABLE;
538 
539 		write32(PCH_DREF_CONTROL, newRef);
540 		read32(PCH_DREF_CONTROL);
541 		TRACE("%s: PCH_DREF_CONTROL after done: 0x%" B_PRIx32 "\n",
542 				__func__, read32(PCH_DREF_CONTROL));
543 		spin(200);
544 	} else {
545 		newRef &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
546 		newRef |= DREF_CPU_SOURCE_OUTPUT_DISABLE;
547 
548 		write32(PCH_DREF_CONTROL, newRef);
549 		read32(PCH_DREF_CONTROL);
550 		TRACE("%s: PCH_DREF_CONTROL after disable CPU output: 0x%" B_PRIx32 "\n",
551 				__func__, read32(PCH_DREF_CONTROL));
552 		spin(200);
553 
554 		if (!wantsSSC) {
555 			newRef &= ~DREF_SSC_SOURCE_MASK;
556 			newRef |= DREF_SSC_SOURCE_DISABLE;
557 			newRef &= ~DREF_SSC1_ENABLE;
558 
559 			write32(PCH_DREF_CONTROL, newRef);
560 			read32(PCH_DREF_CONTROL);
561 			TRACE("%s: PCH_DREF_CONTROL after disable SSC: 0x%" B_PRIx32 "\n",
562 					__func__, read32(PCH_DREF_CONTROL));
563 			spin(200);
564 		}
565 	}
566 }
567 
568 
569 //excerpt (plus modifications) from intel_dpll_mgr.c:
570 
571 /*
572  * Copyright © 2006-2016 Intel Corporation
573  *
574  * Permission is hereby granted, free of charge, to any person obtaining a
575  * copy of this software and associated documentation files (the "Software"),
576  * to deal in the Software without restriction, including without limitation
577  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
578  * and/or sell copies of the Software, and to permit persons to whom the
579  * Software is furnished to do so, subject to the following conditions:
580  *
581  * The above copyright notice and this permission notice (including the next
582  * paragraph) shall be included in all copies or substantial portions of the
583  * Software.
584  *
585  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
586  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
587  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
588  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
589  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
590  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
591  * DEALINGS IN THE SOFTWARE.
592  */
593 
594 #define LC_FREQ 2700
595 #define LC_FREQ_2K (uint64)(LC_FREQ * 2000)
596 
597 #define P_MIN 2
598 #define P_MAX 64
599 #define P_INC 2
600 
601 /* Constraints for PLL good behavior */
602 #define REF_MIN 48
603 #define REF_MAX 400
604 #define VCO_MIN 2400
605 #define VCO_MAX 4800
606 
607 static uint64 AbsSubtr64(uint64 nr1, uint64 nr2)
608 {
609 	if (nr1 >= nr2) {
610 		return nr1 - nr2;
611 	} else {
612 		return nr2 - nr1;
613 	}
614 }
615 
616 struct hsw_wrpll_rnp {
617 	unsigned p, n2, r2;
618 };
619 
620 static unsigned hsw_wrpll_get_budget_for_freq(int clock)
621 {
622 	unsigned budget;
623 
624 	switch (clock) {
625 	case 25175000:
626 	case 25200000:
627 	case 27000000:
628 	case 27027000:
629 	case 37762500:
630 	case 37800000:
631 	case 40500000:
632 	case 40541000:
633 	case 54000000:
634 	case 54054000:
635 	case 59341000:
636 	case 59400000:
637 	case 72000000:
638 	case 74176000:
639 	case 74250000:
640 	case 81000000:
641 	case 81081000:
642 	case 89012000:
643 	case 89100000:
644 	case 108000000:
645 	case 108108000:
646 	case 111264000:
647 	case 111375000:
648 	case 148352000:
649 	case 148500000:
650 	case 162000000:
651 	case 162162000:
652 	case 222525000:
653 	case 222750000:
654 	case 296703000:
655 	case 297000000:
656 		budget = 0;
657 		break;
658 	case 233500000:
659 	case 245250000:
660 	case 247750000:
661 	case 253250000:
662 	case 298000000:
663 		budget = 1500;
664 		break;
665 	case 169128000:
666 	case 169500000:
667 	case 179500000:
668 	case 202000000:
669 		budget = 2000;
670 		break;
671 	case 256250000:
672 	case 262500000:
673 	case 270000000:
674 	case 272500000:
675 	case 273750000:
676 	case 280750000:
677 	case 281250000:
678 	case 286000000:
679 	case 291750000:
680 		budget = 4000;
681 		break;
682 	case 267250000:
683 	case 268500000:
684 		budget = 5000;
685 		break;
686 	default:
687 		budget = 1000;
688 		break;
689 	}
690 
691 	return budget;
692 }
693 
694 static void hsw_wrpll_update_rnp(uint64 freq2k, unsigned int budget,
695 				 unsigned int r2, unsigned int n2,
696 				 unsigned int p,
697 				 struct hsw_wrpll_rnp *best)
698 {
699 	uint64 a, b, c, d, diff, diff_best;
700 
701 	/* No best (r,n,p) yet */
702 	if (best->p == 0) {
703 		best->p = p;
704 		best->n2 = n2;
705 		best->r2 = r2;
706 		return;
707 	}
708 
709 	/*
710 	 * Output clock is (LC_FREQ_2K / 2000) * N / (P * R), which compares to
711 	 * freq2k.
712 	 *
713 	 * delta = 1e6 *
714 	 *	   abs(freq2k - (LC_FREQ_2K * n2/(p * r2))) /
715 	 *	   freq2k;
716 	 *
717 	 * and we would like delta <= budget.
718 	 *
719 	 * If the discrepancy is above the PPM-based budget, always prefer to
720 	 * improve upon the previous solution.  However, if you're within the
721 	 * budget, try to maximize Ref * VCO, that is N / (P * R^2).
722 	 */
723 	a = freq2k * budget * p * r2;
724 	b = freq2k * budget * best->p * best->r2;
725 	diff = AbsSubtr64((uint64)freq2k * p * r2, LC_FREQ_2K * n2);
726 	diff_best = AbsSubtr64((uint64)freq2k * best->p * best->r2,
727 			     LC_FREQ_2K * best->n2);
728 	c = 1000000 * diff;
729 	d = 1000000 * diff_best;
730 
731 	if (a < c && b < d) {
732 		/* If both are above the budget, pick the closer */
733 		if (best->p * best->r2 * diff < p * r2 * diff_best) {
734 			best->p = p;
735 			best->n2 = n2;
736 			best->r2 = r2;
737 		}
738 	} else if (a >= c && b < d) {
739 		/* If A is below the threshold but B is above it?  Update. */
740 		best->p = p;
741 		best->n2 = n2;
742 		best->r2 = r2;
743 	} else if (a >= c && b >= d) {
744 		/* Both are below the limit, so pick the higher n2/(r2*r2) */
745 		if (n2 * best->r2 * best->r2 > best->n2 * r2 * r2) {
746 			best->p = p;
747 			best->n2 = n2;
748 			best->r2 = r2;
749 		}
750 	}
751 	/* Otherwise a < c && b >= d, do nothing */
752 }
753 
754 void
755 hsw_ddi_calculate_wrpll(int clock /* in Hz */,
756 			unsigned *r2_out, unsigned *n2_out, unsigned *p_out)
757 {
758 	uint64 freq2k;
759 	unsigned p, n2, r2;
760 	struct hsw_wrpll_rnp best = { 0, 0, 0 };
761 	unsigned budget;
762 
763 	freq2k = clock / 100;
764 
765 	budget = hsw_wrpll_get_budget_for_freq(clock);
766 
767 	/* Special case handling for 540 pixel clock: bypass WR PLL entirely
768 	 * and directly pass the LC PLL to it. */
769 	if (freq2k == 5400000) {
770 		*n2_out = 2;
771 		*p_out = 1;
772 		*r2_out = 2;
773 		return;
774 	}
775 
776 	/*
777 	 * Ref = LC_FREQ / R, where Ref is the actual reference input seen by
778 	 * the WR PLL.
779 	 *
780 	 * We want R so that REF_MIN <= Ref <= REF_MAX.
781 	 * Injecting R2 = 2 * R gives:
782 	 *   REF_MAX * r2 > LC_FREQ * 2 and
783 	 *   REF_MIN * r2 < LC_FREQ * 2
784 	 *
785 	 * Which means the desired boundaries for r2 are:
786 	 *  LC_FREQ * 2 / REF_MAX < r2 < LC_FREQ * 2 / REF_MIN
787 	 *
788 	 */
789 	for (r2 = LC_FREQ * 2 / REF_MAX + 1;
790 	     r2 <= LC_FREQ * 2 / REF_MIN;
791 	     r2++) {
792 
793 		/*
794 		 * VCO = N * Ref, that is: VCO = N * LC_FREQ / R
795 		 *
796 		 * Once again we want VCO_MIN <= VCO <= VCO_MAX.
797 		 * Injecting R2 = 2 * R and N2 = 2 * N, we get:
798 		 *   VCO_MAX * r2 > n2 * LC_FREQ and
799 		 *   VCO_MIN * r2 < n2 * LC_FREQ)
800 		 *
801 		 * Which means the desired boundaries for n2 are:
802 		 * VCO_MIN * r2 / LC_FREQ < n2 < VCO_MAX * r2 / LC_FREQ
803 		 */
804 		for (n2 = VCO_MIN * r2 / LC_FREQ + 1;
805 		     n2 <= VCO_MAX * r2 / LC_FREQ;
806 		     n2++) {
807 
808 			for (p = P_MIN; p <= P_MAX; p += P_INC)
809 				hsw_wrpll_update_rnp(freq2k, budget,
810 						     r2, n2, p, &best);
811 		}
812 	}
813 
814 	*n2_out = best.n2;
815 	*p_out = best.p;
816 	*r2_out = best.r2;
817 }
818 
819 struct skl_wrpll_context {
820 	uint64 min_deviation;		/* current minimal deviation */
821 	uint64 central_freq;		/* chosen central freq */
822 	uint64 dco_freq;			/* chosen dco freq */
823 	unsigned int p;				/* chosen divider */
824 };
825 
826 /* DCO freq must be within +1%/-6%  of the DCO central freq */
827 #define SKL_DCO_MAX_PDEVIATION	100
828 #define SKL_DCO_MAX_NDEVIATION	600
829 
830 static void skl_wrpll_try_divider(struct skl_wrpll_context *ctx,
831 				  uint64 central_freq,
832 				  uint64 dco_freq,
833 				  unsigned int divider)
834 {
835 	uint64 deviation;
836 
837 	deviation = ((uint64)10000 * AbsSubtr64(dco_freq, central_freq)
838 			      / central_freq);
839 
840 	/* positive deviation */
841 	if (dco_freq >= central_freq) {
842 		if (deviation < SKL_DCO_MAX_PDEVIATION &&
843 		    deviation < ctx->min_deviation) {
844 			ctx->min_deviation = deviation;
845 			ctx->central_freq = central_freq;
846 			ctx->dco_freq = dco_freq;
847 			ctx->p = divider;
848 
849 			TRACE("%s: DCO central frequency %" B_PRIu64 "Hz\n", __func__, central_freq);
850 			TRACE("%s: DCO frequency %" B_PRIu64 "Hz\n", __func__, dco_freq);
851 			TRACE("%s: positive offset accepted, deviation %" B_PRIu64 "\n",
852 				__func__, deviation);
853 		}
854 	/* negative deviation */
855 	} else if (deviation < SKL_DCO_MAX_NDEVIATION &&
856 		   deviation < ctx->min_deviation) {
857 		ctx->min_deviation = deviation;
858 		ctx->central_freq = central_freq;
859 		ctx->dco_freq = dco_freq;
860 		ctx->p = divider;
861 
862 		TRACE("%s: DCO central frequency %" B_PRIu64 "Hz\n", __func__, central_freq);
863 		TRACE("%s: DCO frequency %" B_PRIu64 "Hz\n", __func__, dco_freq);
864 		TRACE("%s: negative offset accepted, deviation %" B_PRIu64 "\n",
865 			__func__, deviation);
866 	}
867 }
868 
869 static void skl_wrpll_get_multipliers(unsigned int p,
870 				      unsigned int *p0 /* out */,
871 				      unsigned int *p1 /* out */,
872 				      unsigned int *p2 /* out */)
873 {
874 	/* even dividers */
875 	if (p % 2 == 0) {
876 		unsigned int half = p / 2;
877 
878 		if (half == 1 || half == 2 || half == 3 || half == 5) {
879 			*p0 = 2;
880 			*p1 = 1;
881 			*p2 = half;
882 		} else if (half % 2 == 0) {
883 			*p0 = 2;
884 			*p1 = half / 2;
885 			*p2 = 2;
886 		} else if (half % 3 == 0) {
887 			*p0 = 3;
888 			*p1 = half / 3;
889 			*p2 = 2;
890 		} else if (half % 7 == 0) {
891 			*p0 = 7;
892 			*p1 = half / 7;
893 			*p2 = 2;
894 		}
895 	} else if (p == 3 || p == 9) {  /* 3, 5, 7, 9, 15, 21, 35 */
896 		*p0 = 3;
897 		*p1 = 1;
898 		*p2 = p / 3;
899 	} else if (p == 5 || p == 7) {
900 		*p0 = p;
901 		*p1 = 1;
902 		*p2 = 1;
903 	} else if (p == 15) {
904 		*p0 = 3;
905 		*p1 = 1;
906 		*p2 = 5;
907 	} else if (p == 21) {
908 		*p0 = 7;
909 		*p1 = 1;
910 		*p2 = 3;
911 	} else if (p == 35) {
912 		*p0 = 7;
913 		*p1 = 1;
914 		*p2 = 5;
915 	}
916 }
917 
918 static void skl_wrpll_context_init(struct skl_wrpll_context *ctx)
919 {
920 	memset(ctx, 0, sizeof(*ctx));
921 	ctx->min_deviation = UINT64_MAX;
922 }
923 
924 static void skl_wrpll_params_populate(struct skl_wrpll_params *params,
925 				      uint64 afe_clock,
926 				      int ref_clock,
927 				      uint64 central_freq,
928 				      uint32 p0, uint32 p1, uint32 p2)
929 {
930 	uint64 dco_freq;
931 
932 	switch (central_freq) {
933 	case 9600000000ULL:
934 		params->central_freq = 0;
935 		break;
936 	case 9000000000ULL:
937 		params->central_freq = 1;
938 		break;
939 	case 8400000000ULL:
940 		params->central_freq = 3;
941 	}
942 
943 	switch (p0) {
944 	case 1:
945 		params->pdiv = 0;
946 		break;
947 	case 2:
948 		params->pdiv = 1;
949 		break;
950 	case 3:
951 		params->pdiv = 2;
952 		break;
953 	case 7:
954 		params->pdiv = 4;
955 		break;
956 	default:
957 		TRACE("%s: Incorrect PDiv\n", __func__);
958 	}
959 
960 	switch (p2) {
961 	case 5:
962 		params->kdiv = 0;
963 		break;
964 	case 2:
965 		params->kdiv = 1;
966 		break;
967 	case 3:
968 		params->kdiv = 2;
969 		break;
970 	case 1:
971 		params->kdiv = 3;
972 		break;
973 	default:
974 		TRACE("%s: Incorrect KDiv\n", __func__);
975 	}
976 
977 	params->qdiv_ratio = p1;
978 	params->qdiv_mode = (params->qdiv_ratio == 1) ? 0 : 1;
979 
980 	dco_freq = p0 * p1 * p2 * afe_clock;
981 	TRACE("%s: AFE frequency %" B_PRIu64 "Hz\n", __func__, afe_clock);
982 	TRACE("%s: p0: %" B_PRIu32 ", p1: %" B_PRIu32 ", p2: %" B_PRIu32 "\n",
983 		__func__, p0,p1,p2);
984 	TRACE("%s: DCO frequency %" B_PRIu64 "Hz\n", __func__, dco_freq);
985 
986 	/*
987 	 * Intermediate values are in Hz.
988 	 * Divide by MHz to match bsepc
989 	 */
990 	params->dco_integer = (uint64)dco_freq / ((uint64)ref_clock * 1000);
991 	params->dco_fraction = (
992 			(uint64)dco_freq / ((uint64)ref_clock / 1000) -
993 			(uint64)params->dco_integer * 1000000) * 0x8000 /
994 			1000000;
995 
996 	TRACE("%s: Reference clock: %gMhz\n", __func__, ref_clock / 1000.0f);
997 	TRACE("%s: DCO integer %" B_PRIu32 "\n", __func__, params->dco_integer);
998 	TRACE("%s: DCO fraction 0x%" B_PRIx32 "\n", __func__, params->dco_fraction);
999 }
1000 
1001 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
1002 
1003 bool
1004 skl_ddi_calculate_wrpll(int clock /* in Hz */,
1005 			int ref_clock,
1006 			struct skl_wrpll_params *wrpll_params)
1007 {
1008 	uint64 afe_clock = (uint64) clock * 5; /* AFE Clock is 5x Pixel clock */
1009 	uint64 dco_central_freq[3] = { 8400000000ULL,
1010 				    9000000000ULL,
1011 				    9600000000ULL };
1012 	static const int even_dividers[] = {  4,  6,  8, 10, 12, 14, 16, 18, 20,
1013 					     24, 28, 30, 32, 36, 40, 42, 44,
1014 					     48, 52, 54, 56, 60, 64, 66, 68,
1015 					     70, 72, 76, 78, 80, 84, 88, 90,
1016 					     92, 96, 98 };
1017 	static const int odd_dividers[] = { 3, 5, 7, 9, 15, 21, 35 };
1018 	static const struct {
1019 		const int *list;
1020 		unsigned int n_dividers;
1021 	} dividers[] = {
1022 		{ even_dividers, ARRAY_SIZE(even_dividers) },
1023 		{ odd_dividers, ARRAY_SIZE(odd_dividers) },
1024 	};
1025 	struct skl_wrpll_context ctx;
1026 	unsigned int dco, d, i;
1027 	unsigned int p0, p1, p2;
1028 
1029 	skl_wrpll_context_init(&ctx);
1030 
1031 	for (d = 0; d < ARRAY_SIZE(dividers); d++) {
1032 		for (dco = 0; dco < ARRAY_SIZE(dco_central_freq); dco++) {
1033 			for (i = 0; i < dividers[d].n_dividers; i++) {
1034 				unsigned int p = dividers[d].list[i];
1035 				uint64 dco_freq = p * afe_clock;
1036 
1037 				skl_wrpll_try_divider(&ctx,
1038 						      dco_central_freq[dco],
1039 						      dco_freq,
1040 						      p);
1041 				/*
1042 				 * Skip the remaining dividers if we're sure to
1043 				 * have found the definitive divider, we can't
1044 				 * improve a 0 deviation.
1045 				 */
1046 				if (ctx.min_deviation == 0)
1047 					goto skip_remaining_dividers;
1048 			}
1049 		}
1050 
1051 skip_remaining_dividers:
1052 		/*
1053 		 * If a solution is found with an even divider, prefer
1054 		 * this one.
1055 		 */
1056 		if (d == 0 && ctx.p)
1057 			break;
1058 	}
1059 
1060 	if (!ctx.p) {
1061 		TRACE("%s: No valid divider found for %dHz\n", __func__, clock);
1062 		return false;
1063 	}
1064 	TRACE("%s: Full divider (p) found is %d\n", __func__, ctx.p);
1065 
1066 	/*
1067 	 * gcc incorrectly analyses that these can be used without being
1068 	 * initialized. To be fair, it's hard to guess.
1069 	 */
1070 	p0 = p1 = p2 = 0;
1071 	skl_wrpll_get_multipliers(ctx.p, &p0, &p1, &p2);
1072 	skl_wrpll_params_populate(wrpll_params, afe_clock, ref_clock,
1073 				  ctx.central_freq, p0, p1, p2);
1074 
1075 	return true;
1076 }
1077 
1078