xref: /haiku/src/add-ons/accelerants/intel_extreme/pll.cpp (revision 9e25244c5e9051f6cd333820d6332397361abd6c)
1 /*
2  * Copyright 2006-2018, Haiku, Inc. All Rights Reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Axel Dörfler, axeld@pinc-software.de
7  *		Alexander von Gluck IV, kallisti5@unixzen.com
8  *		Adrien Destugues, pulkomandy@pulkomandy.tk
9  */
10 
11 
12 #include "pll.h"
13 
14 #include <math.h>
15 #include <stdio.h>
16 #include <string.h>
17 
18 #include <Debug.h>
19 
20 #include <create_display_modes.h>
21 #include <ddc.h>
22 #include <edid.h>
23 #include <validate_display_mode.h>
24 
25 #include "accelerant_protos.h"
26 #include "accelerant.h"
27 #include "utility.h"
28 
29 
30 #undef TRACE
31 #define TRACE_MODE
32 #ifdef TRACE_MODE
33 #	define TRACE(x...) _sPrintf("intel_extreme: " x)
34 #else
35 #	define TRACE(x...)
36 #endif
37 
38 #define ERROR(x...) _sPrintf("intel_extreme: " x)
39 #define CALLED(x...) TRACE("CALLED %s\n", __PRETTY_FUNCTION__)
40 
41 
42 // PLL limits, taken from i915 DRM driver. However, note that we use the values of
43 // N+2, M1+2 and M2+2 here, the - 2 being applied when we write the values to the registers.
44 
45 static pll_limits kLimits85x = {
46 	// p, p1, p2,  n,   m, m1, m2
47 	{  4,  2,  2,  4,  96, 20,  8},
48 	{128, 33,  4, 18, 140, 28, 18},
49 	165000, 908000, 1512000
50 };
51 
52 // For Iron Lake, a new set of timings is introduced along with the FDI system,
53 // and carried on to later cards with just one further change (to the P2 cutoff
54 // frequency) in Sandy Bridge.
55 
56 static pll_limits kLimits9xxSdvo = {
57 	// p, p1, p2,  n,   m, m1, m2
58 	{  5,  1,  5,  3,  70, 10,  5},	// min
59 	{ 80,  8, 10,  8, 120, 20,  9},	// max
60 	200000, 1400000, 2800000
61 };
62 
63 static pll_limits kLimits9xxLvds = {
64 	// p, p1, p2,  n,   m, m1, m2
65 	{  7,  1,  7,  3,  70, 10,  5},	// min
66 	{ 98,  8, 14,  8, 120, 20,  9},	// max
67 	112000, 1400000, 2800000
68 };
69 
70 // Limits for G45 cards taken from i915 DRM driver, mixed with old setup
71 // plus tests to accomodate lower resolutions with still correct refresh.
72 // Note that n here is actually n+2, same applies to m1 and m2.
73 
74 static pll_limits kLimitsG4xSdvo = {
75 	// p, p1, p2,  n,   m, m1, m2
76 	{ 10,  1, 10,  3, 104, 19,  7},	// min
77 	{ 80,  8, 10,  8, 138, 25, 13},	// max
78 	270000, 1750000, 3500000
79 };
80 
81 #if 0
82 static pll_limits kLimitsG4xHdmi = {
83 	// p, p1, p2,  n,   m, m1, m2
84 	{  5,  1,  5,  3, 104, 18,  7},	// min
85 	{ 80,  8, 10,  8, 138, 25, 13},	// max
86 	165000, 1750000, 3500000
87 };
88 #endif
89 
90 static pll_limits kLimitsG4xLvdsSingle = {
91 	// p, p1, p2,  n,   m, m1, m2
92 	{ 28,  2, 14,  3, 104, 19,  7},	// min
93 	{112,  8, 14,  8, 138, 25, 13},	// max
94 	0, 1750000, 3500000
95 };
96 
97 static pll_limits kLimitsG4xLvdsDual = {
98 	// p, p1, p2,  n,   m, m1, m2
99 	{ 14,  2,  7,  3, 104, 19,  7},	// min
100 	{ 42,  6,  7,  8, 138, 25, 13},	// max
101 	0, 1750000, 3500000
102 };
103 
104 static pll_limits kLimitsIlkDac = {
105 	// p, p1, p2, n,   m, m1, m2
106 	{  5,  1,  5, 3,  79, 14,  7}, // min
107 	{ 80,  8, 10, 7, 127, 24, 11}, // max
108 	225000, 1760000, 3510000
109 };
110 
111 static pll_limits kLimitsIlkLvdsSingle = {
112 	// p, p1, p2, n,   m, m1, m2
113 	{ 28,  2, 14, 3,  79, 14,  7}, // min
114 	{112,  8, 14, 5, 118, 24, 11}, // max
115 	225000, 1760000, 3510000
116 };
117 
118 static pll_limits kLimitsIlkLvdsDual = {
119 	// p, p1, p2, n,   m, m1, m2
120 	{ 14,  2,  7, 3,  79, 14,  7}, // min
121 	{ 56,  8,  7, 5, 127, 24, 11}, // max
122 	225000, 1760000, 3510000
123 };
124 
125 // 100Mhz RefClock
126 static pll_limits kLimitsIlkLvdsSingle100 = {
127 	// p, p1, p2, n,   m, m1, m2
128 	{ 28,  2, 14, 3,  79, 14,  7}, // min
129 	{112,  8, 14, 4, 126, 24, 11}, // max
130 	225000, 1760000, 3510000
131 };
132 
133 static pll_limits kLimitsIlkLvdsDual100 = {
134 	// p, p1, p2, n,   m, m1, m2
135 	{ 14,  2,  7, 3,  79, 14,  7}, // min
136 	{ 42,  6,  7, 5, 126, 24, 11}, // max
137 	225000, 1760000, 3510000
138 };
139 
140 // TODO From haswell onwards, a completely different PLL design is used
141 // (intel_gfx-prm-osrc-hsw-display_0.pdf, page 268 for VGA). It uses a "virtual
142 // root frequency" and one just has to set a single divider (integer and
143 // fractional parts), so it makes no sense to reuse the same code and limit
144 // structures there.
145 //
146 // For other display connections, the clock is handled differently, as there is
147 // no need for a precise timing to send things in sync with the display.
148 #if 0
149 static pll_limits kLimitsChv = {
150 	// p, p1, p2, n,   m, m1, m2
151 	{  0,  2,  1, 1,  79, 2,   24 << 22}, // min
152 	{  0,  4, 14, 1, 127, 2,  175 << 22}, // max
153 	0, 4800000, 6480000
154 };
155 
156 static pll_limits kLimitsVlv = {
157 	// p, p1, p2, n,   m, m1, m2
158 	{  0,  2,  2, 1,  79, 2,   11},	// min
159 	{  0,  3, 20, 7, 127, 3,  156},	// max
160 	0, 4000000, 6000000
161 };
162 
163 static pll_limits kLimitsBxt = {
164 	// p, p1, p2, n,  m, m1, m2
165 	{  0,  2,  1, 1,  0,  2,   2 << 22}, // min
166 	{  0,  4, 20, 1,  0,  2, 255 << 22}, // max
167 	0, 4800000, 6700000
168 };
169 #endif
170 
171 static pll_limits kLimitsPinSdvo = {
172 	// p, p1, p2, n,   m, m1,  m2
173 	{  5,  1,  5, 3,   2,  0,   0},	// min
174 	{ 80,  8, 10, 6, 256,  0, 254},	// max
175 	200000, 1700000, 3500000
176 };
177 
178 static pll_limits kLimitsPinLvds = {
179 	// p, p1, p2, n,   m, m1,  m2
180 	{  7,  1, 14, 3,   2,  0,   0},	// min
181 	{112,  8, 14, 6, 256,  0, 254},	// max
182 	112000, 1700000, 3500000
183 };
184 
185 
186 static bool
187 lvds_dual_link(display_timing* current)
188 {
189 	float requestedPixelClock = current->pixel_clock / 1000.0f;
190 	if (requestedPixelClock > 112.999)
191 		return true;
192 
193 	// TODO: Force dual link on MacBookPro6,2  MacBookPro8,2  MacBookPro9,1
194 
195 	return ((read32(INTEL_DIGITAL_LVDS_PORT) & LVDS_CLKB_POWER_MASK)
196 		== LVDS_CLKB_POWER_UP);
197 }
198 
199 
200 bool
201 valid_pll_divisors(pll_divisors* divisors, pll_limits* limits)
202 {
203 	pll_info &info = gInfo->shared_info->pll_info;
204 	uint32 vco = info.reference_frequency * divisors->m / divisors->n;
205 	uint32 frequency = vco / divisors->p;
206 
207 	if (divisors->p < limits->min.p || divisors->p > limits->max.p
208 		|| divisors->m < limits->min.m || divisors->m > limits->max.m
209 		|| vco < limits->min_vco || vco > limits->max_vco
210 		|| frequency < info.min_frequency || frequency > info.max_frequency)
211 		return false;
212 
213 	return true;
214 }
215 
216 
217 static void
218 compute_pll_p2(display_timing* current, pll_divisors* divisors,
219 	pll_limits* limits, bool isLVDS)
220 {
221 	if (isLVDS) {
222 		if (lvds_dual_link(current)) {
223 			// fast DAC timing via 2 channels (dual link LVDS)
224 			divisors->p2 = limits->min.p2;
225 		} else {
226 			// slow DAC timing
227 			divisors->p2 = limits->max.p2;
228 		}
229 	} else {
230 		if (current->pixel_clock < limits->dot_limit) {
231 			// slow DAC timing
232 			divisors->p2 = limits->max.p2;
233 		} else {
234 			// fast DAC timing
235 			divisors->p2 = limits->min.p2;
236 		}
237 	}
238 }
239 
240 
241 // TODO we can simplify this computation, with the way the dividers are set, we
242 // know that all values in the valid range for M are reachable. M1 allows to
243 // generate any multiple of 5 in the range and M2 allows to reach the 4 next
244 // values. Therefore, we don't need to loop over the range of values for M1 and
245 // M2 separately, we could instead just loop over possible values for M.
246 // For this to work, the logic of this function must be reversed: for a given M,
247 // it should give the resulting M1 and M2 values for programming the registers.
248 static uint32
249 compute_pll_m(pll_divisors* divisors)
250 {
251 	if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_CHV)
252 		|| gInfo->shared_info->device_type.InGroup(INTEL_GROUP_VLV)) {
253 		return divisors->m1 * divisors->m2;
254 	}
255 
256 	// Pineview, m1 is reserved
257 	if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_PIN))
258 		return divisors->m2;
259 
260 	return 5 * divisors->m1 + divisors->m2;
261 }
262 
263 
264 static uint32
265 compute_pll_p(pll_divisors* divisors)
266 {
267 	return divisors->p1 * divisors->p2;
268 }
269 
270 
271 static void
272 compute_dpll_g4x(display_timing* current, pll_divisors* divisors, bool isLVDS)
273 {
274 	float requestedPixelClock = current->pixel_clock / 1000.0f;
275 	float referenceClock
276 		= gInfo->shared_info->pll_info.reference_frequency / 1000.0f;
277 
278 	TRACE("%s: required MHz: %g, reference clock: %g\n", __func__,
279 		requestedPixelClock, referenceClock);
280 
281 	pll_limits limits;
282 	if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_G4x)) {
283 		// TODO: Pass port type via video_configuration
284 		if (isLVDS) {
285 			if (lvds_dual_link(current))
286 				memcpy(&limits, &kLimitsG4xLvdsDual, sizeof(pll_limits));
287 			else
288 				memcpy(&limits, &kLimitsG4xLvdsSingle, sizeof(pll_limits));
289 		//} else if (type == INTEL_PORT_TYPE_HDMI) {
290 		//	memcpy(&limits, &kLimitsG4xHdmi, sizeof(pll_limits));
291 		} else
292 			memcpy(&limits, &kLimitsG4xSdvo, sizeof(pll_limits));
293 	} else {
294 		// There must be a PCH, so this is ivy bridge or later
295 		if (isLVDS) {
296 			if (lvds_dual_link(current)) {
297 				if (referenceClock == 100.0)
298 					memcpy(&limits, &kLimitsIlkLvdsDual100, sizeof(pll_limits));
299 				else
300 					memcpy(&limits, &kLimitsIlkLvdsDual, sizeof(pll_limits));
301 			} else {
302 				if (referenceClock == 100.0) {
303 					memcpy(&limits, &kLimitsIlkLvdsSingle100,
304 						sizeof(pll_limits));
305 				} else {
306 					memcpy(&limits, &kLimitsIlkLvdsSingle, sizeof(pll_limits));
307 				}
308 			}
309 		} else {
310 			memcpy(&limits, &kLimitsIlkDac, sizeof(pll_limits));
311 		}
312 	}
313 
314 	compute_pll_p2(current, divisors, &limits, isLVDS);
315 
316 	TRACE("PLL limits, min: p %" B_PRId32 " (p1 %" B_PRId32 ", "
317 		"p2 %" B_PRId32 "), n %" B_PRId32 ", m %" B_PRId32 " "
318 		"(m1 %" B_PRId32 ", m2 %" B_PRId32 ")\n", limits.min.p,
319 		limits.min.p1, limits.min.p2, limits.min.n, limits.min.m,
320 		limits.min.m1, limits.min.m2);
321 	TRACE("PLL limits, max: p %" B_PRId32 " (p1 %" B_PRId32 ", "
322 		"p2 %" B_PRId32 "), n %" B_PRId32 ", m %" B_PRId32 " "
323 		"(m1 %" B_PRId32 ", m2 %" B_PRId32 ")\n", limits.max.p,
324 		limits.max.p1, limits.max.p2, limits.max.n, limits.max.m,
325 		limits.max.m1, limits.max.m2);
326 
327 	float best = requestedPixelClock;
328 	pll_divisors bestDivisors;
329 
330 	for (divisors->n = limits.min.n; divisors->n <= limits.max.n;
331 			divisors->n++) {
332 		for (divisors->m1 = limits.max.m1; divisors->m1 >= limits.min.m1;
333 				divisors->m1--) {
334 			for (divisors->m2 = limits.max.m2; divisors->m2 >= limits.min.m2;
335 					divisors->m2--) {
336 				for (divisors->p1 = limits.max.p1;
337 						divisors->p1 >= limits.min.p1; divisors->p1--) {
338 					divisors->m = compute_pll_m(divisors);
339 					divisors->p = compute_pll_p(divisors);
340 
341 					if (!valid_pll_divisors(divisors, &limits))
342 						continue;
343 
344 					float error = fabs(requestedPixelClock
345 						- (referenceClock * divisors->m)
346 						/ (divisors->n * divisors->p));
347 					if (error < best) {
348 						best = error;
349 						bestDivisors = *divisors;
350 
351 						if (error == 0)
352 							break;
353 					}
354 				}
355 			}
356 		}
357 	}
358 	*divisors = bestDivisors;
359 	TRACE("%s: best MHz: %g (error: %g)\n", __func__,
360 		(referenceClock * divisors->m) / (divisors->n * divisors->p),
361 		best);
362 }
363 
364 
365 static void
366 compute_dpll_9xx(display_timing* current, pll_divisors* divisors, bool isLVDS)
367 {
368 	float requestedPixelClock = current->pixel_clock / 1000.0f;
369 	float referenceClock
370 		= gInfo->shared_info->pll_info.reference_frequency / 1000.0f;
371 
372 	TRACE("%s: required MHz: %g\n", __func__, requestedPixelClock);
373 
374 	pll_limits limits;
375 	if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_PIN)) {
376 		if (isLVDS)
377 			memcpy(&limits, &kLimitsPinLvds, sizeof(pll_limits));
378 		else
379 			memcpy(&limits, &kLimitsPinSdvo, sizeof(pll_limits));
380 	} else if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_85x)) {
381 		memcpy(&limits, &kLimits85x, sizeof(pll_limits));
382 	} else {
383 		if (isLVDS)
384 			memcpy(&limits, &kLimits9xxLvds, sizeof(pll_limits));
385 		else
386 			memcpy(&limits, &kLimits9xxSdvo, sizeof(pll_limits));
387 	}
388 
389 	compute_pll_p2(current, divisors, &limits, isLVDS);
390 
391 	TRACE("PLL limits, min: p %" B_PRId32 " (p1 %" B_PRId32 ", "
392 		"p2 %" B_PRId32 "), n %" B_PRId32 ", m %" B_PRId32 " "
393 		"(m1 %" B_PRId32 ", m2 %" B_PRId32 ")\n", limits.min.p,
394 		limits.min.p1, limits.min.p2, limits.min.n, limits.min.m,
395 		limits.min.m1, limits.min.m2);
396 	TRACE("PLL limits, max: p %" B_PRId32 " (p1 %" B_PRId32 ", "
397 		"p2 %" B_PRId32 "), n %" B_PRId32 ", m %" B_PRId32 " "
398 		"(m1 %" B_PRId32 ", m2 %" B_PRId32 ")\n", limits.max.p,
399 		limits.max.p1, limits.max.p2, limits.max.n, limits.max.m,
400 		limits.max.m1, limits.max.m2);
401 
402 	bool is_pine = gInfo->shared_info->device_type.InGroup(INTEL_GROUP_PIN);
403 
404 	float best = requestedPixelClock;
405 	pll_divisors bestDivisors;
406 	memset(&bestDivisors, 0, sizeof(bestDivisors));
407 
408 	for (divisors->m1 = limits.min.m1; divisors->m1 <= limits.max.m1;
409 			divisors->m1++) {
410 		for (divisors->m2 = limits.min.m2; divisors->m2 <= limits.max.m2
411 				&& ((divisors->m2 < divisors->m1) || is_pine); divisors->m2++) {
412 			for (divisors->n = limits.min.n; divisors->n <= limits.max.n;
413 					divisors->n++) {
414 				for (divisors->p1 = limits.min.p1;
415 						divisors->p1 <= limits.max.p1; divisors->p1++) {
416 					divisors->m = compute_pll_m(divisors);
417 					divisors->p = compute_pll_p(divisors);
418 
419 					if (!valid_pll_divisors(divisors, &limits))
420 						continue;
421 
422 					float error = fabs(requestedPixelClock
423 						- (referenceClock * divisors->m)
424 						/ (divisors->n * divisors->p));
425 					if (error < best) {
426 						best = error;
427 						bestDivisors = *divisors;
428 
429 						if (error == 0)
430 							break;
431 					}
432 				}
433 			}
434 		}
435 	}
436 
437 	*divisors = bestDivisors;
438 
439 	if (best == requestedPixelClock)
440 		debugger("No valid PLL configuration found");
441 	else {
442 		TRACE("%s: best MHz: %g (error: %g)\n", __func__,
443 			(referenceClock * divisors->m) / (divisors->n * divisors->p),
444 			best);
445 	}
446 }
447 
448 
449 void
450 compute_pll_divisors(display_timing* current, pll_divisors* divisors, bool isLVDS)
451 {
452 	if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_G4x)
453 		|| (gInfo->shared_info->pch_info != INTEL_PCH_NONE)) {
454 		compute_dpll_g4x(current, divisors, isLVDS);
455 	} else if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_CHV)) {
456 		ERROR("%s: TODO: CherryView\n", __func__);
457 	} else if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_VLV)) {
458 		ERROR("%s: TODO: VallyView\n", __func__);
459 	} else
460 		compute_dpll_9xx(current, divisors, isLVDS);
461 
462 	TRACE("%s: found: p = %" B_PRId32 " (p1 = %" B_PRId32 ", "
463 		"p2 = %" B_PRId32 "), n = %" B_PRId32 ", m = %" B_PRId32 " "
464 		"(m1 = %" B_PRId32 ", m2 = %" B_PRId32 ")\n", __func__,
465 		divisors->p, divisors->p1, divisors->p2, divisors->n,
466 		divisors->m, divisors->m1, divisors->m2);
467 }
468 
469 
470 void
471 refclk_activate_ilk(bool hasPanel)
472 {
473 	CALLED();
474 
475 	// aka, our engineers hate you
476 
477 	bool wantsSSC;
478 	bool hasCK505;
479 	if (gInfo->shared_info->pch_info == INTEL_PCH_IBX) {
480 		TRACE("%s: Generation 5 graphics\n", __func__);
481 		//XXX: This should be == vbt display_clock_mode
482 		hasCK505 = false;
483 		wantsSSC = hasCK505;
484 	} else {
485 		if (gInfo->shared_info->device_type.Generation() == 6) {
486 			TRACE("%s: Generation 6 graphics\n", __func__);
487 		} else {
488 			TRACE("%s: Generation 7 graphics\n", __func__);
489 		}
490 		hasCK505 = false;
491 		wantsSSC = true;
492 	}
493 
494 	uint32 clkRef = read32(PCH_DREF_CONTROL);
495 	uint32 newRef = clkRef;
496 	TRACE("%s: PCH_DREF_CONTROL before: 0x%" B_PRIx32 "\n", __func__, clkRef);
497 
498 	newRef &= ~DREF_NONSPREAD_SOURCE_MASK;
499 
500 	if (hasCK505)
501 		newRef |= DREF_NONSPREAD_CK505_ENABLE;
502 	else
503 		newRef |= DREF_NONSPREAD_SOURCE_ENABLE;
504 
505 	newRef &= ~DREF_SSC_SOURCE_MASK;
506 	newRef &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
507 	newRef &= ~DREF_SSC1_ENABLE;
508 
509 	if (newRef == clkRef) {
510 		TRACE("%s: No changes to reference clock.\n", __func__);
511 		return;
512 	}
513 
514 	if (hasPanel) {
515 		newRef &= ~DREF_SSC_SOURCE_MASK;
516 		newRef |= DREF_SSC_SOURCE_ENABLE;
517 
518 		if (wantsSSC)
519 			newRef |= DREF_SSC1_ENABLE;
520 		else
521 			newRef &= ~DREF_SSC1_ENABLE;
522 
523 		// Power up SSC before enabling outputs
524 		write32(PCH_DREF_CONTROL, newRef);
525 		read32(PCH_DREF_CONTROL);
526 		TRACE("%s: PCH_DREF_CONTROL after SSC on/off: 0x%" B_PRIx32 "\n",
527 				__func__, read32(PCH_DREF_CONTROL));
528 		spin(200);
529 
530 		newRef &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
531 
532 		bool hasEDP = true;
533 		if (hasEDP) {
534 			if (wantsSSC)
535 				newRef |= DREF_CPU_SOURCE_OUTPUT_DOWNSPREAD;
536 			else
537 				newRef |= DREF_CPU_SOURCE_OUTPUT_NONSPREAD;
538 		} else
539 			newRef |= DREF_CPU_SOURCE_OUTPUT_DISABLE;
540 
541 		write32(PCH_DREF_CONTROL, newRef);
542 		read32(PCH_DREF_CONTROL);
543 		TRACE("%s: PCH_DREF_CONTROL after done: 0x%" B_PRIx32 "\n",
544 				__func__, read32(PCH_DREF_CONTROL));
545 		spin(200);
546 	} else {
547 		newRef &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
548 		newRef |= DREF_CPU_SOURCE_OUTPUT_DISABLE;
549 
550 		write32(PCH_DREF_CONTROL, newRef);
551 		read32(PCH_DREF_CONTROL);
552 		TRACE("%s: PCH_DREF_CONTROL after disable CPU output: 0x%" B_PRIx32 "\n",
553 				__func__, read32(PCH_DREF_CONTROL));
554 		spin(200);
555 
556 		if (!wantsSSC) {
557 			newRef &= ~DREF_SSC_SOURCE_MASK;
558 			newRef |= DREF_SSC_SOURCE_DISABLE;
559 			newRef &= ~DREF_SSC1_ENABLE;
560 
561 			write32(PCH_DREF_CONTROL, newRef);
562 			read32(PCH_DREF_CONTROL);
563 			TRACE("%s: PCH_DREF_CONTROL after disable SSC: 0x%" B_PRIx32 "\n",
564 					__func__, read32(PCH_DREF_CONTROL));
565 			spin(200);
566 		}
567 	}
568 }
569 
570 
571 //excerpt (plus modifications) from intel_dpll_mgr.c:
572 
573 /*
574  * Copyright © 2006-2016 Intel Corporation
575  *
576  * Permission is hereby granted, free of charge, to any person obtaining a
577  * copy of this software and associated documentation files (the "Software"),
578  * to deal in the Software without restriction, including without limitation
579  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
580  * and/or sell copies of the Software, and to permit persons to whom the
581  * Software is furnished to do so, subject to the following conditions:
582  *
583  * The above copyright notice and this permission notice (including the next
584  * paragraph) shall be included in all copies or substantial portions of the
585  * Software.
586  *
587  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
588  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
589  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
590  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
591  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
592  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
593  * DEALINGS IN THE SOFTWARE.
594  */
595 
596 #define LC_FREQ 2700
597 #define LC_FREQ_2K (uint64)(LC_FREQ * 2000)
598 
599 #define P_MIN 2
600 #define P_MAX 64
601 #define P_INC 2
602 
603 /* Constraints for PLL good behavior */
604 #define REF_MIN 48
605 #define REF_MAX 400
606 #define VCO_MIN 2400
607 #define VCO_MAX 4800
608 
609 static uint64 AbsSubtr64(uint64 nr1, uint64 nr2)
610 {
611 	if (nr1 >= nr2) {
612 		return nr1 - nr2;
613 	} else {
614 		return nr2 - nr1;
615 	}
616 }
617 
618 struct hsw_wrpll_rnp {
619 	unsigned p, n2, r2;
620 };
621 
622 static unsigned hsw_wrpll_get_budget_for_freq(int clock)
623 {
624 	unsigned budget;
625 
626 	switch (clock) {
627 	case 25175000:
628 	case 25200000:
629 	case 27000000:
630 	case 27027000:
631 	case 37762500:
632 	case 37800000:
633 	case 40500000:
634 	case 40541000:
635 	case 54000000:
636 	case 54054000:
637 	case 59341000:
638 	case 59400000:
639 	case 72000000:
640 	case 74176000:
641 	case 74250000:
642 	case 81000000:
643 	case 81081000:
644 	case 89012000:
645 	case 89100000:
646 	case 108000000:
647 	case 108108000:
648 	case 111264000:
649 	case 111375000:
650 	case 148352000:
651 	case 148500000:
652 	case 162000000:
653 	case 162162000:
654 	case 222525000:
655 	case 222750000:
656 	case 296703000:
657 	case 297000000:
658 		budget = 0;
659 		break;
660 	case 233500000:
661 	case 245250000:
662 	case 247750000:
663 	case 253250000:
664 	case 298000000:
665 		budget = 1500;
666 		break;
667 	case 169128000:
668 	case 169500000:
669 	case 179500000:
670 	case 202000000:
671 		budget = 2000;
672 		break;
673 	case 256250000:
674 	case 262500000:
675 	case 270000000:
676 	case 272500000:
677 	case 273750000:
678 	case 280750000:
679 	case 281250000:
680 	case 286000000:
681 	case 291750000:
682 		budget = 4000;
683 		break;
684 	case 267250000:
685 	case 268500000:
686 		budget = 5000;
687 		break;
688 	default:
689 		budget = 1000;
690 		break;
691 	}
692 
693 	return budget;
694 }
695 
696 static void hsw_wrpll_update_rnp(uint64 freq2k, unsigned int budget,
697 				 unsigned int r2, unsigned int n2,
698 				 unsigned int p,
699 				 struct hsw_wrpll_rnp *best)
700 {
701 	uint64 a, b, c, d, diff, diff_best;
702 
703 	/* No best (r,n,p) yet */
704 	if (best->p == 0) {
705 		best->p = p;
706 		best->n2 = n2;
707 		best->r2 = r2;
708 		return;
709 	}
710 
711 	/*
712 	 * Output clock is (LC_FREQ_2K / 2000) * N / (P * R), which compares to
713 	 * freq2k.
714 	 *
715 	 * delta = 1e6 *
716 	 *	   abs(freq2k - (LC_FREQ_2K * n2/(p * r2))) /
717 	 *	   freq2k;
718 	 *
719 	 * and we would like delta <= budget.
720 	 *
721 	 * If the discrepancy is above the PPM-based budget, always prefer to
722 	 * improve upon the previous solution.  However, if you're within the
723 	 * budget, try to maximize Ref * VCO, that is N / (P * R^2).
724 	 */
725 	a = freq2k * budget * p * r2;
726 	b = freq2k * budget * best->p * best->r2;
727 	diff = AbsSubtr64((uint64)freq2k * p * r2, LC_FREQ_2K * n2);
728 	diff_best = AbsSubtr64((uint64)freq2k * best->p * best->r2,
729 			     LC_FREQ_2K * best->n2);
730 	c = 1000000 * diff;
731 	d = 1000000 * diff_best;
732 
733 	if (a < c && b < d) {
734 		/* If both are above the budget, pick the closer */
735 		if (best->p * best->r2 * diff < p * r2 * diff_best) {
736 			best->p = p;
737 			best->n2 = n2;
738 			best->r2 = r2;
739 		}
740 	} else if (a >= c && b < d) {
741 		/* If A is below the threshold but B is above it?  Update. */
742 		best->p = p;
743 		best->n2 = n2;
744 		best->r2 = r2;
745 	} else if (a >= c && b >= d) {
746 		/* Both are below the limit, so pick the higher n2/(r2*r2) */
747 		if (n2 * best->r2 * best->r2 > best->n2 * r2 * r2) {
748 			best->p = p;
749 			best->n2 = n2;
750 			best->r2 = r2;
751 		}
752 	}
753 	/* Otherwise a < c && b >= d, do nothing */
754 }
755 
756 void
757 hsw_ddi_calculate_wrpll(int clock /* in Hz */,
758 			unsigned *r2_out, unsigned *n2_out, unsigned *p_out)
759 {
760 	uint64 freq2k;
761 	unsigned p, n2, r2;
762 	struct hsw_wrpll_rnp best = { 0, 0, 0 };
763 	unsigned budget;
764 
765 	freq2k = clock / 100;
766 
767 	budget = hsw_wrpll_get_budget_for_freq(clock);
768 
769 	/* Special case handling for 540 pixel clock: bypass WR PLL entirely
770 	 * and directly pass the LC PLL to it. */
771 	if (freq2k == 5400000) {
772 		*n2_out = 2;
773 		*p_out = 1;
774 		*r2_out = 2;
775 		return;
776 	}
777 
778 	/*
779 	 * Ref = LC_FREQ / R, where Ref is the actual reference input seen by
780 	 * the WR PLL.
781 	 *
782 	 * We want R so that REF_MIN <= Ref <= REF_MAX.
783 	 * Injecting R2 = 2 * R gives:
784 	 *   REF_MAX * r2 > LC_FREQ * 2 and
785 	 *   REF_MIN * r2 < LC_FREQ * 2
786 	 *
787 	 * Which means the desired boundaries for r2 are:
788 	 *  LC_FREQ * 2 / REF_MAX < r2 < LC_FREQ * 2 / REF_MIN
789 	 *
790 	 */
791 	for (r2 = LC_FREQ * 2 / REF_MAX + 1;
792 	     r2 <= LC_FREQ * 2 / REF_MIN;
793 	     r2++) {
794 
795 		/*
796 		 * VCO = N * Ref, that is: VCO = N * LC_FREQ / R
797 		 *
798 		 * Once again we want VCO_MIN <= VCO <= VCO_MAX.
799 		 * Injecting R2 = 2 * R and N2 = 2 * N, we get:
800 		 *   VCO_MAX * r2 > n2 * LC_FREQ and
801 		 *   VCO_MIN * r2 < n2 * LC_FREQ)
802 		 *
803 		 * Which means the desired boundaries for n2 are:
804 		 * VCO_MIN * r2 / LC_FREQ < n2 < VCO_MAX * r2 / LC_FREQ
805 		 */
806 		for (n2 = VCO_MIN * r2 / LC_FREQ + 1;
807 		     n2 <= VCO_MAX * r2 / LC_FREQ;
808 		     n2++) {
809 
810 			for (p = P_MIN; p <= P_MAX; p += P_INC)
811 				hsw_wrpll_update_rnp(freq2k, budget,
812 						     r2, n2, p, &best);
813 		}
814 	}
815 
816 	*n2_out = best.n2;
817 	*p_out = best.p;
818 	*r2_out = best.r2;
819 }
820 
821 struct skl_wrpll_context {
822 	uint64 min_deviation;		/* current minimal deviation */
823 	uint64 central_freq;		/* chosen central freq */
824 	uint64 dco_freq;			/* chosen dco freq */
825 	unsigned int p;				/* chosen divider */
826 };
827 
828 /* DCO freq must be within +1%/-6%  of the DCO central freq */
829 #define SKL_DCO_MAX_PDEVIATION	100
830 #define SKL_DCO_MAX_NDEVIATION	600
831 
832 static void skl_wrpll_try_divider(struct skl_wrpll_context *ctx,
833 				  uint64 central_freq,
834 				  uint64 dco_freq,
835 				  unsigned int divider)
836 {
837 	uint64 deviation;
838 
839 	deviation = ((uint64)10000 * AbsSubtr64(dco_freq, central_freq)
840 			      / central_freq);
841 
842 	/* positive deviation */
843 	if (dco_freq >= central_freq) {
844 		if (deviation < SKL_DCO_MAX_PDEVIATION &&
845 		    deviation < ctx->min_deviation) {
846 			ctx->min_deviation = deviation;
847 			ctx->central_freq = central_freq;
848 			ctx->dco_freq = dco_freq;
849 			ctx->p = divider;
850 
851 			TRACE("%s: DCO central frequency %" B_PRIu64 "Hz\n", __func__, central_freq);
852 			TRACE("%s: DCO frequency %" B_PRIu64 "Hz\n", __func__, dco_freq);
853 			TRACE("%s: positive offset accepted, deviation %" B_PRIu64 "\n",
854 				__func__, deviation);
855 		}
856 	/* negative deviation */
857 	} else if (deviation < SKL_DCO_MAX_NDEVIATION &&
858 		   deviation < ctx->min_deviation) {
859 		ctx->min_deviation = deviation;
860 		ctx->central_freq = central_freq;
861 		ctx->dco_freq = dco_freq;
862 		ctx->p = divider;
863 
864 		TRACE("%s: DCO central frequency %" B_PRIu64 "Hz\n", __func__, central_freq);
865 		TRACE("%s: DCO frequency %" B_PRIu64 "Hz\n", __func__, dco_freq);
866 		TRACE("%s: negative offset accepted, deviation %" B_PRIu64 "\n",
867 			__func__, deviation);
868 	}
869 }
870 
871 static void skl_wrpll_get_multipliers(unsigned int p,
872 				      unsigned int *p0 /* out */,
873 				      unsigned int *p1 /* out */,
874 				      unsigned int *p2 /* out */)
875 {
876 	/* even dividers */
877 	if (p % 2 == 0) {
878 		unsigned int half = p / 2;
879 
880 		if (half == 1 || half == 2 || half == 3 || half == 5) {
881 			*p0 = 2;
882 			*p1 = 1;
883 			*p2 = half;
884 		} else if (half % 2 == 0) {
885 			*p0 = 2;
886 			*p1 = half / 2;
887 			*p2 = 2;
888 		} else if (half % 3 == 0) {
889 			*p0 = 3;
890 			*p1 = half / 3;
891 			*p2 = 2;
892 		} else if (half % 7 == 0) {
893 			*p0 = 7;
894 			*p1 = half / 7;
895 			*p2 = 2;
896 		}
897 	} else if (p == 3 || p == 9) {  /* 3, 5, 7, 9, 15, 21, 35 */
898 		*p0 = 3;
899 		*p1 = 1;
900 		*p2 = p / 3;
901 	} else if (p == 5 || p == 7) {
902 		*p0 = p;
903 		*p1 = 1;
904 		*p2 = 1;
905 	} else if (p == 15) {
906 		*p0 = 3;
907 		*p1 = 1;
908 		*p2 = 5;
909 	} else if (p == 21) {
910 		*p0 = 7;
911 		*p1 = 1;
912 		*p2 = 3;
913 	} else if (p == 35) {
914 		*p0 = 7;
915 		*p1 = 1;
916 		*p2 = 5;
917 	}
918 }
919 
920 static void skl_wrpll_context_init(struct skl_wrpll_context *ctx)
921 {
922 	memset(ctx, 0, sizeof(*ctx));
923 	ctx->min_deviation = UINT64_MAX;
924 }
925 
926 static void skl_wrpll_params_populate(struct skl_wrpll_params *params,
927 				      uint64 afe_clock,
928 				      int ref_clock,
929 				      uint64 central_freq,
930 				      uint32 p0, uint32 p1, uint32 p2)
931 {
932 	uint64 dco_freq;
933 
934 	switch (central_freq) {
935 	case 9600000000ULL:
936 		params->central_freq = 0;
937 		break;
938 	case 9000000000ULL:
939 		params->central_freq = 1;
940 		break;
941 	case 8400000000ULL:
942 		params->central_freq = 3;
943 	}
944 
945 	switch (p0) {
946 	case 1:
947 		params->pdiv = 0;
948 		break;
949 	case 2:
950 		params->pdiv = 1;
951 		break;
952 	case 3:
953 		params->pdiv = 2;
954 		break;
955 	case 7:
956 		params->pdiv = 4;
957 		break;
958 	default:
959 		TRACE("%s: Incorrect PDiv\n", __func__);
960 	}
961 
962 	switch (p2) {
963 	case 5:
964 		params->kdiv = 0;
965 		break;
966 	case 2:
967 		params->kdiv = 1;
968 		break;
969 	case 3:
970 		params->kdiv = 2;
971 		break;
972 	case 1:
973 		params->kdiv = 3;
974 		break;
975 	default:
976 		TRACE("%s: Incorrect KDiv\n", __func__);
977 	}
978 
979 	params->qdiv_ratio = p1;
980 	params->qdiv_mode = (params->qdiv_ratio == 1) ? 0 : 1;
981 
982 	dco_freq = p0 * p1 * p2 * afe_clock;
983 	TRACE("%s: AFE frequency %" B_PRIu64 "Hz\n", __func__, afe_clock);
984 	TRACE("%s: p0: %" B_PRIu32 ", p1: %" B_PRIu32 ", p2: %" B_PRIu32 "\n",
985 		__func__, p0,p1,p2);
986 	TRACE("%s: DCO frequency %" B_PRIu64 "Hz\n", __func__, dco_freq);
987 
988 	/*
989 	 * Intermediate values are in Hz.
990 	 * Divide by MHz to match bsepc
991 	 */
992 	params->dco_integer = (uint64)dco_freq / ((uint64)ref_clock * 1000);
993 	params->dco_fraction = (
994 			(uint64)dco_freq / ((uint64)ref_clock / 1000) -
995 			(uint64)params->dco_integer * 1000000) * 0x8000 /
996 			1000000;
997 
998 	TRACE("%s: Reference clock: %gMhz\n", __func__, ref_clock / 1000.0f);
999 	TRACE("%s: DCO integer %" B_PRIu32 "\n", __func__, params->dco_integer);
1000 	TRACE("%s: DCO fraction 0x%" B_PRIx32 "\n", __func__, params->dco_fraction);
1001 }
1002 
1003 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
1004 
1005 bool
1006 skl_ddi_calculate_wrpll(int clock /* in Hz */,
1007 			int ref_clock,
1008 			struct skl_wrpll_params *wrpll_params)
1009 {
1010 	uint64 afe_clock = (uint64) clock * 5; /* AFE Clock is 5x Pixel clock */
1011 	uint64 dco_central_freq[3] = { 8400000000ULL,
1012 				    9000000000ULL,
1013 				    9600000000ULL };
1014 	static const int even_dividers[] = {  4,  6,  8, 10, 12, 14, 16, 18, 20,
1015 					     24, 28, 30, 32, 36, 40, 42, 44,
1016 					     48, 52, 54, 56, 60, 64, 66, 68,
1017 					     70, 72, 76, 78, 80, 84, 88, 90,
1018 					     92, 96, 98 };
1019 	static const int odd_dividers[] = { 3, 5, 7, 9, 15, 21, 35 };
1020 	static const struct {
1021 		const int *list;
1022 		unsigned int n_dividers;
1023 	} dividers[] = {
1024 		{ even_dividers, ARRAY_SIZE(even_dividers) },
1025 		{ odd_dividers, ARRAY_SIZE(odd_dividers) },
1026 	};
1027 	struct skl_wrpll_context ctx;
1028 	unsigned int dco, d, i;
1029 	unsigned int p0, p1, p2;
1030 
1031 	skl_wrpll_context_init(&ctx);
1032 
1033 	for (d = 0; d < ARRAY_SIZE(dividers); d++) {
1034 		for (dco = 0; dco < ARRAY_SIZE(dco_central_freq); dco++) {
1035 			for (i = 0; i < dividers[d].n_dividers; i++) {
1036 				unsigned int p = dividers[d].list[i];
1037 				uint64 dco_freq = p * afe_clock;
1038 
1039 				skl_wrpll_try_divider(&ctx,
1040 						      dco_central_freq[dco],
1041 						      dco_freq,
1042 						      p);
1043 				/*
1044 				 * Skip the remaining dividers if we're sure to
1045 				 * have found the definitive divider, we can't
1046 				 * improve a 0 deviation.
1047 				 */
1048 				if (ctx.min_deviation == 0)
1049 					goto skip_remaining_dividers;
1050 			}
1051 		}
1052 
1053 skip_remaining_dividers:
1054 		/*
1055 		 * If a solution is found with an even divider, prefer
1056 		 * this one.
1057 		 */
1058 		if (d == 0 && ctx.p)
1059 			break;
1060 	}
1061 
1062 	if (!ctx.p) {
1063 		TRACE("%s: No valid divider found for %dHz\n", __func__, clock);
1064 		return false;
1065 	}
1066 	TRACE("%s: Full divider (p) found is %d\n", __func__, ctx.p);
1067 
1068 	/*
1069 	 * gcc incorrectly analyses that these can be used without being
1070 	 * initialized. To be fair, it's hard to guess.
1071 	 */
1072 	p0 = p1 = p2 = 0;
1073 	skl_wrpll_get_multipliers(ctx.p, &p0, &p1, &p2);
1074 	skl_wrpll_params_populate(wrpll_params, afe_clock, ref_clock,
1075 				  ctx.central_freq, p0, p1, p2);
1076 
1077 	return true;
1078 }
1079 
1080