1 /*
2 * Copyright 2006-2018, Haiku, Inc. All Rights Reserved.
3 * Distributed under the terms of the MIT License.
4 *
5 * Authors:
6 * Axel Dörfler, axeld@pinc-software.de
7 * Alexander von Gluck IV, kallisti5@unixzen.com
8 * Adrien Destugues, pulkomandy@pulkomandy.tk
9 */
10
11
12 #include "pll.h"
13
14 #include <math.h>
15 #include <stdio.h>
16 #include <string.h>
17
18 #include <Debug.h>
19
20 #include <create_display_modes.h>
21 #include <ddc.h>
22 #include <edid.h>
23 #include <validate_display_mode.h>
24
25 #include "accelerant_protos.h"
26 #include "accelerant.h"
27 #include "utility.h"
28
29
30 #undef TRACE
31 #define TRACE_MODE
32 #ifdef TRACE_MODE
33 # define TRACE(x...) _sPrintf("intel_extreme: " x)
34 #else
35 # define TRACE(x...)
36 #endif
37
38 #define ERROR(x...) _sPrintf("intel_extreme: " x)
39 #define CALLED(x...) TRACE("CALLED %s\n", __PRETTY_FUNCTION__)
40
41
42 // PLL limits, taken from i915 DRM driver. However, note that we use the values of
43 // N+2, M1+2 and M2+2 here, the - 2 being applied when we write the values to the registers.
44
45 static pll_limits kLimits85x = {
46 // p, p1, p2, n, m, m1, m2
47 { 4, 2, 2, 4, 96, 20, 8},
48 {128, 33, 4, 18, 140, 28, 18},
49 165000, 908000, 1512000
50 };
51
52 // For Iron Lake, a new set of timings is introduced along with the FDI system,
53 // and carried on to later cards with just one further change (to the P2 cutoff
54 // frequency) in Sandy Bridge.
55
56 static pll_limits kLimits9xxSdvo = {
57 // p, p1, p2, n, m, m1, m2
58 { 5, 1, 5, 3, 70, 10, 5}, // min
59 { 80, 8, 10, 8, 120, 20, 9}, // max
60 200000, 1400000, 2800000
61 };
62
63 static pll_limits kLimits9xxLvds = {
64 // p, p1, p2, n, m, m1, m2
65 { 7, 1, 7, 3, 70, 10, 5}, // min
66 { 98, 8, 14, 8, 120, 20, 9}, // max
67 112000, 1400000, 2800000
68 };
69
70 // Limits for G45 cards taken from i915 DRM driver, mixed with old setup
71 // plus tests to accomodate lower resolutions with still correct refresh.
72 // Note that n here is actually n+2, same applies to m1 and m2.
73
74 static pll_limits kLimitsG4xSdvo = {
75 // p, p1, p2, n, m, m1, m2
76 { 10, 1, 10, 3, 104, 19, 7}, // min
77 { 80, 8, 10, 8, 138, 25, 13}, // max
78 270000, 1750000, 3500000
79 };
80
81 #if 0
82 static pll_limits kLimitsG4xHdmi = {
83 // p, p1, p2, n, m, m1, m2
84 { 5, 1, 5, 3, 104, 18, 7}, // min
85 { 80, 8, 10, 8, 138, 25, 13}, // max
86 165000, 1750000, 3500000
87 };
88 #endif
89
90 static pll_limits kLimitsG4xLvdsSingle = {
91 // p, p1, p2, n, m, m1, m2
92 { 28, 2, 14, 3, 104, 19, 7}, // min
93 {112, 8, 14, 8, 138, 25, 13}, // max
94 0, 1750000, 3500000
95 };
96
97 static pll_limits kLimitsG4xLvdsDual = {
98 // p, p1, p2, n, m, m1, m2
99 { 14, 2, 7, 3, 104, 19, 7}, // min
100 { 42, 6, 7, 8, 138, 25, 13}, // max
101 0, 1750000, 3500000
102 };
103
104 static pll_limits kLimitsIlkDac = {
105 // p, p1, p2, n, m, m1, m2
106 { 5, 1, 5, 3, 79, 14, 7}, // min
107 { 80, 8, 10, 7, 127, 24, 11}, // max
108 225000, 1760000, 3510000
109 };
110
111 static pll_limits kLimitsIlkLvdsSingle = {
112 // p, p1, p2, n, m, m1, m2
113 { 28, 2, 14, 3, 79, 14, 7}, // min
114 {112, 8, 14, 5, 118, 24, 11}, // max
115 225000, 1760000, 3510000
116 };
117
118 static pll_limits kLimitsIlkLvdsDual = {
119 // p, p1, p2, n, m, m1, m2
120 { 14, 2, 7, 3, 79, 14, 7}, // min
121 { 56, 8, 7, 5, 127, 24, 11}, // max
122 225000, 1760000, 3510000
123 };
124
125 // 100Mhz RefClock
126 static pll_limits kLimitsIlkLvdsSingle100 = {
127 // p, p1, p2, n, m, m1, m2
128 { 28, 2, 14, 3, 79, 14, 7}, // min
129 {112, 8, 14, 4, 126, 24, 11}, // max
130 225000, 1760000, 3510000
131 };
132
133 static pll_limits kLimitsIlkLvdsDual100 = {
134 // p, p1, p2, n, m, m1, m2
135 { 14, 2, 7, 3, 79, 14, 7}, // min
136 { 42, 6, 7, 5, 126, 24, 11}, // max
137 225000, 1760000, 3510000
138 };
139
140 // TODO From haswell onwards, a completely different PLL design is used
141 // (intel_gfx-prm-osrc-hsw-display_0.pdf, page 268 for VGA). It uses a "virtual
142 // root frequency" and one just has to set a single divider (integer and
143 // fractional parts), so it makes no sense to reuse the same code and limit
144 // structures there.
145 //
146 // For other display connections, the clock is handled differently, as there is
147 // no need for a precise timing to send things in sync with the display.
148 #if 0
149 static pll_limits kLimitsChv = {
150 // p, p1, p2, n, m, m1, m2
151 { 0, 2, 1, 1, 79, 2, 24 << 22}, // min
152 { 0, 4, 14, 1, 127, 2, 175 << 22}, // max
153 0, 4800000, 6480000
154 };
155
156 static pll_limits kLimitsVlv = {
157 // p, p1, p2, n, m, m1, m2
158 { 0, 2, 2, 1, 79, 2, 11}, // min
159 { 0, 3, 20, 7, 127, 3, 156}, // max
160 0, 4000000, 6000000
161 };
162
163 static pll_limits kLimitsBxt = {
164 // p, p1, p2, n, m, m1, m2
165 { 0, 2, 1, 1, 0, 2, 2 << 22}, // min
166 { 0, 4, 20, 1, 0, 2, 255 << 22}, // max
167 0, 4800000, 6700000
168 };
169 #endif
170
171 static pll_limits kLimitsPinSdvo = {
172 // p, p1, p2, n, m, m1, m2
173 { 5, 1, 5, 3, 2, 0, 0}, // min
174 { 80, 8, 10, 6, 256, 0, 254}, // max
175 200000, 1700000, 3500000
176 };
177
178 static pll_limits kLimitsPinLvds = {
179 // p, p1, p2, n, m, m1, m2
180 { 7, 1, 14, 3, 2, 0, 0}, // min
181 {112, 8, 14, 6, 256, 0, 254}, // max
182 112000, 1700000, 3500000
183 };
184
185
186 static bool
lvds_dual_link(display_timing * current)187 lvds_dual_link(display_timing* current)
188 {
189 float requestedPixelClock = current->pixel_clock / 1000.0f;
190 if (requestedPixelClock > 112.999)
191 return true;
192
193 // TODO: Force dual link on MacBookPro6,2 MacBookPro8,2 MacBookPro9,1
194
195 return ((read32(INTEL_DIGITAL_LVDS_PORT) & LVDS_CLKB_POWER_MASK)
196 == LVDS_CLKB_POWER_UP);
197 }
198
199
200 bool
valid_pll_divisors(pll_divisors * divisors,pll_limits * limits)201 valid_pll_divisors(pll_divisors* divisors, pll_limits* limits)
202 {
203 pll_info &info = gInfo->shared_info->pll_info;
204 uint32 vco = info.reference_frequency * divisors->m / divisors->n;
205 uint32 frequency = vco / divisors->p;
206
207 if (divisors->p < limits->min.p || divisors->p > limits->max.p
208 || divisors->m < limits->min.m || divisors->m > limits->max.m
209 || vco < limits->min_vco || vco > limits->max_vco
210 || frequency < info.min_frequency || frequency > info.max_frequency)
211 return false;
212
213 return true;
214 }
215
216
217 static void
compute_pll_p2(display_timing * current,pll_divisors * divisors,pll_limits * limits,bool isLVDS)218 compute_pll_p2(display_timing* current, pll_divisors* divisors,
219 pll_limits* limits, bool isLVDS)
220 {
221 if (isLVDS) {
222 if (lvds_dual_link(current)) {
223 // fast DAC timing via 2 channels (dual link LVDS)
224 divisors->p2 = limits->min.p2;
225 } else {
226 // slow DAC timing
227 divisors->p2 = limits->max.p2;
228 }
229 } else {
230 if (current->pixel_clock < limits->dot_limit) {
231 // slow DAC timing
232 divisors->p2 = limits->max.p2;
233 } else {
234 // fast DAC timing
235 divisors->p2 = limits->min.p2;
236 }
237 }
238 }
239
240
241 // TODO we can simplify this computation, with the way the dividers are set, we
242 // know that all values in the valid range for M are reachable. M1 allows to
243 // generate any multiple of 5 in the range and M2 allows to reach the 4 next
244 // values. Therefore, we don't need to loop over the range of values for M1 and
245 // M2 separately, we could instead just loop over possible values for M.
246 // For this to work, the logic of this function must be reversed: for a given M,
247 // it should give the resulting M1 and M2 values for programming the registers.
248 static uint32
compute_pll_m(pll_divisors * divisors)249 compute_pll_m(pll_divisors* divisors)
250 {
251 if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_CHV)
252 || gInfo->shared_info->device_type.InGroup(INTEL_GROUP_VLV)) {
253 return divisors->m1 * divisors->m2;
254 }
255
256 // Pineview, m1 is reserved
257 if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_PIN))
258 return divisors->m2;
259
260 return 5 * divisors->m1 + divisors->m2;
261 }
262
263
264 static uint32
compute_pll_p(pll_divisors * divisors)265 compute_pll_p(pll_divisors* divisors)
266 {
267 return divisors->p1 * divisors->p2;
268 }
269
270
271 static void
compute_dpll_g4x(display_timing * current,pll_divisors * divisors,bool isLVDS)272 compute_dpll_g4x(display_timing* current, pll_divisors* divisors, bool isLVDS)
273 {
274 float requestedPixelClock = current->pixel_clock / 1000.0f;
275 float referenceClock
276 = gInfo->shared_info->pll_info.reference_frequency / 1000.0f;
277
278 TRACE("%s: required MHz: %g, reference clock: %g\n", __func__,
279 requestedPixelClock, referenceClock);
280
281 pll_limits limits;
282 if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_G4x)) {
283 // TODO: Pass port type via video_configuration
284 if (isLVDS) {
285 if (lvds_dual_link(current))
286 memcpy(&limits, &kLimitsG4xLvdsDual, sizeof(pll_limits));
287 else
288 memcpy(&limits, &kLimitsG4xLvdsSingle, sizeof(pll_limits));
289 //} else if (type == INTEL_PORT_TYPE_HDMI) {
290 // memcpy(&limits, &kLimitsG4xHdmi, sizeof(pll_limits));
291 } else
292 memcpy(&limits, &kLimitsG4xSdvo, sizeof(pll_limits));
293 } else {
294 // There must be a PCH, so this is ivy bridge or later
295 if (isLVDS) {
296 if (lvds_dual_link(current)) {
297 if (referenceClock == 100.0)
298 memcpy(&limits, &kLimitsIlkLvdsDual100, sizeof(pll_limits));
299 else
300 memcpy(&limits, &kLimitsIlkLvdsDual, sizeof(pll_limits));
301 } else {
302 if (referenceClock == 100.0) {
303 memcpy(&limits, &kLimitsIlkLvdsSingle100,
304 sizeof(pll_limits));
305 } else {
306 memcpy(&limits, &kLimitsIlkLvdsSingle, sizeof(pll_limits));
307 }
308 }
309 } else {
310 memcpy(&limits, &kLimitsIlkDac, sizeof(pll_limits));
311 }
312 }
313
314 compute_pll_p2(current, divisors, &limits, isLVDS);
315
316 TRACE("PLL limits, min: p %" B_PRId32 " (p1 %" B_PRId32 ", "
317 "p2 %" B_PRId32 "), n %" B_PRId32 ", m %" B_PRId32 " "
318 "(m1 %" B_PRId32 ", m2 %" B_PRId32 ")\n", limits.min.p,
319 limits.min.p1, limits.min.p2, limits.min.n, limits.min.m,
320 limits.min.m1, limits.min.m2);
321 TRACE("PLL limits, max: p %" B_PRId32 " (p1 %" B_PRId32 ", "
322 "p2 %" B_PRId32 "), n %" B_PRId32 ", m %" B_PRId32 " "
323 "(m1 %" B_PRId32 ", m2 %" B_PRId32 ")\n", limits.max.p,
324 limits.max.p1, limits.max.p2, limits.max.n, limits.max.m,
325 limits.max.m1, limits.max.m2);
326
327 float best = requestedPixelClock;
328 pll_divisors bestDivisors;
329
330 for (divisors->n = limits.min.n; divisors->n <= limits.max.n;
331 divisors->n++) {
332 for (divisors->m1 = limits.max.m1; divisors->m1 >= limits.min.m1;
333 divisors->m1--) {
334 for (divisors->m2 = limits.max.m2; divisors->m2 >= limits.min.m2;
335 divisors->m2--) {
336 for (divisors->p1 = limits.max.p1;
337 divisors->p1 >= limits.min.p1; divisors->p1--) {
338 divisors->m = compute_pll_m(divisors);
339 divisors->p = compute_pll_p(divisors);
340
341 if (!valid_pll_divisors(divisors, &limits))
342 continue;
343
344 float error = fabs(requestedPixelClock
345 - (referenceClock * divisors->m)
346 / (divisors->n * divisors->p));
347 if (error < best) {
348 best = error;
349 bestDivisors = *divisors;
350
351 if (error == 0)
352 break;
353 }
354 }
355 }
356 }
357 }
358 *divisors = bestDivisors;
359 TRACE("%s: best MHz: %g (error: %g)\n", __func__,
360 (referenceClock * divisors->m) / (divisors->n * divisors->p),
361 best);
362 }
363
364
365 static void
compute_dpll_9xx(display_timing * current,pll_divisors * divisors,bool isLVDS)366 compute_dpll_9xx(display_timing* current, pll_divisors* divisors, bool isLVDS)
367 {
368 float requestedPixelClock = current->pixel_clock / 1000.0f;
369 float referenceClock
370 = gInfo->shared_info->pll_info.reference_frequency / 1000.0f;
371
372 TRACE("%s: required MHz: %g\n", __func__, requestedPixelClock);
373
374 pll_limits limits;
375 if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_PIN)) {
376 if (isLVDS)
377 memcpy(&limits, &kLimitsPinLvds, sizeof(pll_limits));
378 else
379 memcpy(&limits, &kLimitsPinSdvo, sizeof(pll_limits));
380 } else if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_85x)) {
381 memcpy(&limits, &kLimits85x, sizeof(pll_limits));
382 } else {
383 if (isLVDS)
384 memcpy(&limits, &kLimits9xxLvds, sizeof(pll_limits));
385 else
386 memcpy(&limits, &kLimits9xxSdvo, sizeof(pll_limits));
387 }
388
389 compute_pll_p2(current, divisors, &limits, isLVDS);
390
391 TRACE("PLL limits, min: p %" B_PRId32 " (p1 %" B_PRId32 ", "
392 "p2 %" B_PRId32 "), n %" B_PRId32 ", m %" B_PRId32 " "
393 "(m1 %" B_PRId32 ", m2 %" B_PRId32 ")\n", limits.min.p,
394 limits.min.p1, limits.min.p2, limits.min.n, limits.min.m,
395 limits.min.m1, limits.min.m2);
396 TRACE("PLL limits, max: p %" B_PRId32 " (p1 %" B_PRId32 ", "
397 "p2 %" B_PRId32 "), n %" B_PRId32 ", m %" B_PRId32 " "
398 "(m1 %" B_PRId32 ", m2 %" B_PRId32 ")\n", limits.max.p,
399 limits.max.p1, limits.max.p2, limits.max.n, limits.max.m,
400 limits.max.m1, limits.max.m2);
401
402 bool is_pine = gInfo->shared_info->device_type.InGroup(INTEL_GROUP_PIN);
403
404 float best = requestedPixelClock;
405 pll_divisors bestDivisors;
406 memset(&bestDivisors, 0, sizeof(bestDivisors));
407
408 for (divisors->m1 = limits.min.m1; divisors->m1 <= limits.max.m1;
409 divisors->m1++) {
410 for (divisors->m2 = limits.min.m2; divisors->m2 <= limits.max.m2
411 && ((divisors->m2 < divisors->m1) || is_pine); divisors->m2++) {
412 for (divisors->n = limits.min.n; divisors->n <= limits.max.n;
413 divisors->n++) {
414 for (divisors->p1 = limits.min.p1;
415 divisors->p1 <= limits.max.p1; divisors->p1++) {
416 divisors->m = compute_pll_m(divisors);
417 divisors->p = compute_pll_p(divisors);
418
419 if (!valid_pll_divisors(divisors, &limits))
420 continue;
421
422 float error = fabs(requestedPixelClock
423 - (referenceClock * divisors->m)
424 / (divisors->n * divisors->p));
425 if (error < best) {
426 best = error;
427 bestDivisors = *divisors;
428
429 if (error == 0)
430 break;
431 }
432 }
433 }
434 }
435 }
436
437 *divisors = bestDivisors;
438
439 if (best == requestedPixelClock)
440 debugger("No valid PLL configuration found");
441 else {
442 TRACE("%s: best MHz: %g (error: %g)\n", __func__,
443 (referenceClock * divisors->m) / (divisors->n * divisors->p),
444 best);
445 }
446 }
447
448
449 void
compute_pll_divisors(display_timing * current,pll_divisors * divisors,bool isLVDS)450 compute_pll_divisors(display_timing* current, pll_divisors* divisors, bool isLVDS)
451 {
452 if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_G4x)
453 || (gInfo->shared_info->pch_info != INTEL_PCH_NONE)) {
454 compute_dpll_g4x(current, divisors, isLVDS);
455 } else if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_CHV)) {
456 ERROR("%s: TODO: CherryView\n", __func__);
457 } else if (gInfo->shared_info->device_type.InGroup(INTEL_GROUP_VLV)) {
458 ERROR("%s: TODO: VallyView\n", __func__);
459 } else
460 compute_dpll_9xx(current, divisors, isLVDS);
461
462 TRACE("%s: found: p = %" B_PRId32 " (p1 = %" B_PRId32 ", "
463 "p2 = %" B_PRId32 "), n = %" B_PRId32 ", m = %" B_PRId32 " "
464 "(m1 = %" B_PRId32 ", m2 = %" B_PRId32 ")\n", __func__,
465 divisors->p, divisors->p1, divisors->p2, divisors->n,
466 divisors->m, divisors->m1, divisors->m2);
467 }
468
469
470 void
refclk_activate_ilk(bool hasPanel)471 refclk_activate_ilk(bool hasPanel)
472 {
473 CALLED();
474
475 bool wantsSSC;
476 bool hasCK505;
477 if (gInfo->shared_info->pch_info == INTEL_PCH_IBX) {
478 TRACE("%s: Generation 5 graphics\n", __func__);
479 //XXX: This should be == vbt display_clock_mode
480 hasCK505 = false;
481 wantsSSC = hasCK505;
482 } else {
483 if (gInfo->shared_info->device_type.Generation() == 6) {
484 TRACE("%s: Generation 6 graphics\n", __func__);
485 } else {
486 TRACE("%s: Generation 7 graphics\n", __func__);
487 }
488 hasCK505 = false;
489 wantsSSC = true;
490 }
491
492 uint32 clkRef = read32(PCH_DREF_CONTROL);
493 uint32 newRef = clkRef;
494 TRACE("%s: PCH_DREF_CONTROL before: 0x%" B_PRIx32 "\n", __func__, clkRef);
495
496 newRef &= ~DREF_NONSPREAD_SOURCE_MASK;
497
498 if (hasCK505)
499 newRef |= DREF_NONSPREAD_CK505_ENABLE;
500 else
501 newRef |= DREF_NONSPREAD_SOURCE_ENABLE;
502
503 newRef &= ~DREF_SSC_SOURCE_MASK;
504 newRef &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
505 newRef &= ~DREF_SSC1_ENABLE;
506
507 if (newRef == clkRef) {
508 TRACE("%s: No changes to reference clock.\n", __func__);
509 return;
510 }
511
512 if (hasPanel) {
513 newRef &= ~DREF_SSC_SOURCE_MASK;
514 newRef |= DREF_SSC_SOURCE_ENABLE;
515
516 if (wantsSSC)
517 newRef |= DREF_SSC1_ENABLE;
518 else
519 newRef &= ~DREF_SSC1_ENABLE;
520
521 // Power up SSC before enabling outputs
522 write32(PCH_DREF_CONTROL, newRef);
523 read32(PCH_DREF_CONTROL);
524 TRACE("%s: PCH_DREF_CONTROL after SSC on/off: 0x%" B_PRIx32 "\n",
525 __func__, read32(PCH_DREF_CONTROL));
526 spin(200);
527
528 newRef &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
529
530 bool hasEDP = true;
531 if (hasEDP) {
532 if (wantsSSC)
533 newRef |= DREF_CPU_SOURCE_OUTPUT_DOWNSPREAD;
534 else
535 newRef |= DREF_CPU_SOURCE_OUTPUT_NONSPREAD;
536 } else
537 newRef |= DREF_CPU_SOURCE_OUTPUT_DISABLE;
538
539 write32(PCH_DREF_CONTROL, newRef);
540 read32(PCH_DREF_CONTROL);
541 TRACE("%s: PCH_DREF_CONTROL after done: 0x%" B_PRIx32 "\n",
542 __func__, read32(PCH_DREF_CONTROL));
543 spin(200);
544 } else {
545 newRef &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
546 newRef |= DREF_CPU_SOURCE_OUTPUT_DISABLE;
547
548 write32(PCH_DREF_CONTROL, newRef);
549 read32(PCH_DREF_CONTROL);
550 TRACE("%s: PCH_DREF_CONTROL after disable CPU output: 0x%" B_PRIx32 "\n",
551 __func__, read32(PCH_DREF_CONTROL));
552 spin(200);
553
554 if (!wantsSSC) {
555 newRef &= ~DREF_SSC_SOURCE_MASK;
556 newRef |= DREF_SSC_SOURCE_DISABLE;
557 newRef &= ~DREF_SSC1_ENABLE;
558
559 write32(PCH_DREF_CONTROL, newRef);
560 read32(PCH_DREF_CONTROL);
561 TRACE("%s: PCH_DREF_CONTROL after disable SSC: 0x%" B_PRIx32 "\n",
562 __func__, read32(PCH_DREF_CONTROL));
563 spin(200);
564 }
565 }
566 }
567
568
569 //excerpt (plus modifications) from intel_dpll_mgr.c:
570
571 /*
572 * Copyright © 2006-2016 Intel Corporation
573 *
574 * Permission is hereby granted, free of charge, to any person obtaining a
575 * copy of this software and associated documentation files (the "Software"),
576 * to deal in the Software without restriction, including without limitation
577 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
578 * and/or sell copies of the Software, and to permit persons to whom the
579 * Software is furnished to do so, subject to the following conditions:
580 *
581 * The above copyright notice and this permission notice (including the next
582 * paragraph) shall be included in all copies or substantial portions of the
583 * Software.
584 *
585 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
586 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
587 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
588 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
589 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
590 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
591 * DEALINGS IN THE SOFTWARE.
592 */
593
594 #define LC_FREQ 2700
595 #define LC_FREQ_2K (uint64)(LC_FREQ * 2000)
596
597 #define P_MIN 2
598 #define P_MAX 64
599 #define P_INC 2
600
601 /* Constraints for PLL good behavior */
602 #define REF_MIN 48
603 #define REF_MAX 400
604 #define VCO_MIN 2400
605 #define VCO_MAX 4800
606
AbsSubtr64(uint64 nr1,uint64 nr2)607 static uint64 AbsSubtr64(uint64 nr1, uint64 nr2)
608 {
609 if (nr1 >= nr2) {
610 return nr1 - nr2;
611 } else {
612 return nr2 - nr1;
613 }
614 }
615
616 struct hsw_wrpll_rnp {
617 unsigned p, n2, r2;
618 };
619
hsw_wrpll_get_budget_for_freq(int clock)620 static unsigned hsw_wrpll_get_budget_for_freq(int clock)
621 {
622 unsigned budget;
623
624 switch (clock) {
625 case 25175000:
626 case 25200000:
627 case 27000000:
628 case 27027000:
629 case 37762500:
630 case 37800000:
631 case 40500000:
632 case 40541000:
633 case 54000000:
634 case 54054000:
635 case 59341000:
636 case 59400000:
637 case 72000000:
638 case 74176000:
639 case 74250000:
640 case 81000000:
641 case 81081000:
642 case 89012000:
643 case 89100000:
644 case 108000000:
645 case 108108000:
646 case 111264000:
647 case 111375000:
648 case 148352000:
649 case 148500000:
650 case 162000000:
651 case 162162000:
652 case 222525000:
653 case 222750000:
654 case 296703000:
655 case 297000000:
656 budget = 0;
657 break;
658 case 233500000:
659 case 245250000:
660 case 247750000:
661 case 253250000:
662 case 298000000:
663 budget = 1500;
664 break;
665 case 169128000:
666 case 169500000:
667 case 179500000:
668 case 202000000:
669 budget = 2000;
670 break;
671 case 256250000:
672 case 262500000:
673 case 270000000:
674 case 272500000:
675 case 273750000:
676 case 280750000:
677 case 281250000:
678 case 286000000:
679 case 291750000:
680 budget = 4000;
681 break;
682 case 267250000:
683 case 268500000:
684 budget = 5000;
685 break;
686 default:
687 budget = 1000;
688 break;
689 }
690
691 return budget;
692 }
693
hsw_wrpll_update_rnp(uint64 freq2k,unsigned int budget,unsigned int r2,unsigned int n2,unsigned int p,struct hsw_wrpll_rnp * best)694 static void hsw_wrpll_update_rnp(uint64 freq2k, unsigned int budget,
695 unsigned int r2, unsigned int n2,
696 unsigned int p,
697 struct hsw_wrpll_rnp *best)
698 {
699 uint64 a, b, c, d, diff, diff_best;
700
701 /* No best (r,n,p) yet */
702 if (best->p == 0) {
703 best->p = p;
704 best->n2 = n2;
705 best->r2 = r2;
706 return;
707 }
708
709 /*
710 * Output clock is (LC_FREQ_2K / 2000) * N / (P * R), which compares to
711 * freq2k.
712 *
713 * delta = 1e6 *
714 * abs(freq2k - (LC_FREQ_2K * n2/(p * r2))) /
715 * freq2k;
716 *
717 * and we would like delta <= budget.
718 *
719 * If the discrepancy is above the PPM-based budget, always prefer to
720 * improve upon the previous solution. However, if you're within the
721 * budget, try to maximize Ref * VCO, that is N / (P * R^2).
722 */
723 a = freq2k * budget * p * r2;
724 b = freq2k * budget * best->p * best->r2;
725 diff = AbsSubtr64((uint64)freq2k * p * r2, LC_FREQ_2K * n2);
726 diff_best = AbsSubtr64((uint64)freq2k * best->p * best->r2,
727 LC_FREQ_2K * best->n2);
728 c = 1000000 * diff;
729 d = 1000000 * diff_best;
730
731 if (a < c && b < d) {
732 /* If both are above the budget, pick the closer */
733 if (best->p * best->r2 * diff < p * r2 * diff_best) {
734 best->p = p;
735 best->n2 = n2;
736 best->r2 = r2;
737 }
738 } else if (a >= c && b < d) {
739 /* If A is below the threshold but B is above it? Update. */
740 best->p = p;
741 best->n2 = n2;
742 best->r2 = r2;
743 } else if (a >= c && b >= d) {
744 /* Both are below the limit, so pick the higher n2/(r2*r2) */
745 if (n2 * best->r2 * best->r2 > best->n2 * r2 * r2) {
746 best->p = p;
747 best->n2 = n2;
748 best->r2 = r2;
749 }
750 }
751 /* Otherwise a < c && b >= d, do nothing */
752 }
753
754 void
hsw_ddi_calculate_wrpll(int clock,unsigned * r2_out,unsigned * n2_out,unsigned * p_out)755 hsw_ddi_calculate_wrpll(int clock /* in Hz */,
756 unsigned *r2_out, unsigned *n2_out, unsigned *p_out)
757 {
758 uint64 freq2k;
759 unsigned p, n2, r2;
760 struct hsw_wrpll_rnp best = { 0, 0, 0 };
761 unsigned budget;
762
763 freq2k = clock / 100;
764
765 budget = hsw_wrpll_get_budget_for_freq(clock);
766
767 /* Special case handling for 540 pixel clock: bypass WR PLL entirely
768 * and directly pass the LC PLL to it. */
769 if (freq2k == 5400000) {
770 *n2_out = 2;
771 *p_out = 1;
772 *r2_out = 2;
773 return;
774 }
775
776 /*
777 * Ref = LC_FREQ / R, where Ref is the actual reference input seen by
778 * the WR PLL.
779 *
780 * We want R so that REF_MIN <= Ref <= REF_MAX.
781 * Injecting R2 = 2 * R gives:
782 * REF_MAX * r2 > LC_FREQ * 2 and
783 * REF_MIN * r2 < LC_FREQ * 2
784 *
785 * Which means the desired boundaries for r2 are:
786 * LC_FREQ * 2 / REF_MAX < r2 < LC_FREQ * 2 / REF_MIN
787 *
788 */
789 for (r2 = LC_FREQ * 2 / REF_MAX + 1;
790 r2 <= LC_FREQ * 2 / REF_MIN;
791 r2++) {
792
793 /*
794 * VCO = N * Ref, that is: VCO = N * LC_FREQ / R
795 *
796 * Once again we want VCO_MIN <= VCO <= VCO_MAX.
797 * Injecting R2 = 2 * R and N2 = 2 * N, we get:
798 * VCO_MAX * r2 > n2 * LC_FREQ and
799 * VCO_MIN * r2 < n2 * LC_FREQ)
800 *
801 * Which means the desired boundaries for n2 are:
802 * VCO_MIN * r2 / LC_FREQ < n2 < VCO_MAX * r2 / LC_FREQ
803 */
804 for (n2 = VCO_MIN * r2 / LC_FREQ + 1;
805 n2 <= VCO_MAX * r2 / LC_FREQ;
806 n2++) {
807
808 for (p = P_MIN; p <= P_MAX; p += P_INC)
809 hsw_wrpll_update_rnp(freq2k, budget,
810 r2, n2, p, &best);
811 }
812 }
813
814 *n2_out = best.n2;
815 *p_out = best.p;
816 *r2_out = best.r2;
817 }
818
819 struct skl_wrpll_context {
820 uint64 min_deviation; /* current minimal deviation */
821 uint64 central_freq; /* chosen central freq */
822 uint64 dco_freq; /* chosen dco freq */
823 unsigned int p; /* chosen divider */
824 };
825
826 /* DCO freq must be within +1%/-6% of the DCO central freq */
827 #define SKL_DCO_MAX_PDEVIATION 100
828 #define SKL_DCO_MAX_NDEVIATION 600
829
skl_wrpll_try_divider(struct skl_wrpll_context * ctx,uint64 central_freq,uint64 dco_freq,unsigned int divider)830 static void skl_wrpll_try_divider(struct skl_wrpll_context *ctx,
831 uint64 central_freq,
832 uint64 dco_freq,
833 unsigned int divider)
834 {
835 uint64 deviation;
836
837 deviation = ((uint64)10000 * AbsSubtr64(dco_freq, central_freq)
838 / central_freq);
839
840 /* positive deviation */
841 if (dco_freq >= central_freq) {
842 if (deviation < SKL_DCO_MAX_PDEVIATION &&
843 deviation < ctx->min_deviation) {
844 ctx->min_deviation = deviation;
845 ctx->central_freq = central_freq;
846 ctx->dco_freq = dco_freq;
847 ctx->p = divider;
848
849 TRACE("%s: DCO central frequency %" B_PRIu64 "Hz\n", __func__, central_freq);
850 TRACE("%s: DCO frequency %" B_PRIu64 "Hz\n", __func__, dco_freq);
851 TRACE("%s: positive offset accepted, deviation %" B_PRIu64 "\n",
852 __func__, deviation);
853 }
854 /* negative deviation */
855 } else if (deviation < SKL_DCO_MAX_NDEVIATION &&
856 deviation < ctx->min_deviation) {
857 ctx->min_deviation = deviation;
858 ctx->central_freq = central_freq;
859 ctx->dco_freq = dco_freq;
860 ctx->p = divider;
861
862 TRACE("%s: DCO central frequency %" B_PRIu64 "Hz\n", __func__, central_freq);
863 TRACE("%s: DCO frequency %" B_PRIu64 "Hz\n", __func__, dco_freq);
864 TRACE("%s: negative offset accepted, deviation %" B_PRIu64 "\n",
865 __func__, deviation);
866 }
867 }
868
skl_wrpll_get_multipliers(unsigned int p,unsigned int * p0,unsigned int * p1,unsigned int * p2)869 static void skl_wrpll_get_multipliers(unsigned int p,
870 unsigned int *p0 /* out */,
871 unsigned int *p1 /* out */,
872 unsigned int *p2 /* out */)
873 {
874 /* even dividers */
875 if (p % 2 == 0) {
876 unsigned int half = p / 2;
877
878 if (half == 1 || half == 2 || half == 3 || half == 5) {
879 *p0 = 2;
880 *p1 = 1;
881 *p2 = half;
882 } else if (half % 2 == 0) {
883 *p0 = 2;
884 *p1 = half / 2;
885 *p2 = 2;
886 } else if (half % 3 == 0) {
887 *p0 = 3;
888 *p1 = half / 3;
889 *p2 = 2;
890 } else if (half % 7 == 0) {
891 *p0 = 7;
892 *p1 = half / 7;
893 *p2 = 2;
894 }
895 } else if (p == 3 || p == 9) { /* 3, 5, 7, 9, 15, 21, 35 */
896 *p0 = 3;
897 *p1 = 1;
898 *p2 = p / 3;
899 } else if (p == 5 || p == 7) {
900 *p0 = p;
901 *p1 = 1;
902 *p2 = 1;
903 } else if (p == 15) {
904 *p0 = 3;
905 *p1 = 1;
906 *p2 = 5;
907 } else if (p == 21) {
908 *p0 = 7;
909 *p1 = 1;
910 *p2 = 3;
911 } else if (p == 35) {
912 *p0 = 7;
913 *p1 = 1;
914 *p2 = 5;
915 }
916 }
917
skl_wrpll_context_init(struct skl_wrpll_context * ctx)918 static void skl_wrpll_context_init(struct skl_wrpll_context *ctx)
919 {
920 memset(ctx, 0, sizeof(*ctx));
921 ctx->min_deviation = UINT64_MAX;
922 }
923
skl_wrpll_params_populate(struct skl_wrpll_params * params,uint64 afe_clock,int ref_clock,uint64 central_freq,uint32 p0,uint32 p1,uint32 p2)924 static void skl_wrpll_params_populate(struct skl_wrpll_params *params,
925 uint64 afe_clock,
926 int ref_clock,
927 uint64 central_freq,
928 uint32 p0, uint32 p1, uint32 p2)
929 {
930 uint64 dco_freq;
931
932 switch (central_freq) {
933 case 9600000000ULL:
934 params->central_freq = 0;
935 break;
936 case 9000000000ULL:
937 params->central_freq = 1;
938 break;
939 case 8400000000ULL:
940 params->central_freq = 3;
941 }
942
943 switch (p0) {
944 case 1:
945 params->pdiv = 0;
946 break;
947 case 2:
948 params->pdiv = 1;
949 break;
950 case 3:
951 params->pdiv = 2;
952 break;
953 case 7:
954 params->pdiv = 4;
955 break;
956 default:
957 TRACE("%s: Incorrect PDiv\n", __func__);
958 }
959
960 switch (p2) {
961 case 5:
962 params->kdiv = 0;
963 break;
964 case 2:
965 params->kdiv = 1;
966 break;
967 case 3:
968 params->kdiv = 2;
969 break;
970 case 1:
971 params->kdiv = 3;
972 break;
973 default:
974 TRACE("%s: Incorrect KDiv\n", __func__);
975 }
976
977 params->qdiv_ratio = p1;
978 params->qdiv_mode = (params->qdiv_ratio == 1) ? 0 : 1;
979
980 dco_freq = p0 * p1 * p2 * afe_clock;
981 TRACE("%s: AFE frequency %" B_PRIu64 "Hz\n", __func__, afe_clock);
982 TRACE("%s: p0: %" B_PRIu32 ", p1: %" B_PRIu32 ", p2: %" B_PRIu32 "\n",
983 __func__, p0,p1,p2);
984 TRACE("%s: DCO frequency %" B_PRIu64 "Hz\n", __func__, dco_freq);
985
986 /*
987 * Intermediate values are in Hz.
988 * Divide by MHz to match bsepc
989 */
990 params->dco_integer = (uint64)dco_freq / ((uint64)ref_clock * 1000);
991 params->dco_fraction = (
992 (uint64)dco_freq / ((uint64)ref_clock / 1000) -
993 (uint64)params->dco_integer * 1000000) * 0x8000 /
994 1000000;
995
996 TRACE("%s: Reference clock: %gMhz\n", __func__, ref_clock / 1000.0f);
997 TRACE("%s: DCO integer %" B_PRIu32 "\n", __func__, params->dco_integer);
998 TRACE("%s: DCO fraction 0x%" B_PRIx32 "\n", __func__, params->dco_fraction);
999 }
1000
1001 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
1002
1003 bool
skl_ddi_calculate_wrpll(int clock,int ref_clock,struct skl_wrpll_params * wrpll_params)1004 skl_ddi_calculate_wrpll(int clock /* in Hz */,
1005 int ref_clock,
1006 struct skl_wrpll_params *wrpll_params)
1007 {
1008 uint64 afe_clock = (uint64) clock * 5; /* AFE Clock is 5x Pixel clock */
1009 uint64 dco_central_freq[3] = { 8400000000ULL,
1010 9000000000ULL,
1011 9600000000ULL };
1012 static const int even_dividers[] = { 4, 6, 8, 10, 12, 14, 16, 18, 20,
1013 24, 28, 30, 32, 36, 40, 42, 44,
1014 48, 52, 54, 56, 60, 64, 66, 68,
1015 70, 72, 76, 78, 80, 84, 88, 90,
1016 92, 96, 98 };
1017 static const int odd_dividers[] = { 3, 5, 7, 9, 15, 21, 35 };
1018 static const struct {
1019 const int *list;
1020 unsigned int n_dividers;
1021 } dividers[] = {
1022 { even_dividers, ARRAY_SIZE(even_dividers) },
1023 { odd_dividers, ARRAY_SIZE(odd_dividers) },
1024 };
1025 struct skl_wrpll_context ctx;
1026 unsigned int dco, d, i;
1027 unsigned int p0, p1, p2;
1028
1029 skl_wrpll_context_init(&ctx);
1030
1031 for (d = 0; d < ARRAY_SIZE(dividers); d++) {
1032 for (dco = 0; dco < ARRAY_SIZE(dco_central_freq); dco++) {
1033 for (i = 0; i < dividers[d].n_dividers; i++) {
1034 unsigned int p = dividers[d].list[i];
1035 uint64 dco_freq = p * afe_clock;
1036
1037 skl_wrpll_try_divider(&ctx,
1038 dco_central_freq[dco],
1039 dco_freq,
1040 p);
1041 /*
1042 * Skip the remaining dividers if we're sure to
1043 * have found the definitive divider, we can't
1044 * improve a 0 deviation.
1045 */
1046 if (ctx.min_deviation == 0)
1047 goto skip_remaining_dividers;
1048 }
1049 }
1050
1051 skip_remaining_dividers:
1052 /*
1053 * If a solution is found with an even divider, prefer
1054 * this one.
1055 */
1056 if (d == 0 && ctx.p)
1057 break;
1058 }
1059
1060 if (!ctx.p) {
1061 TRACE("%s: No valid divider found for %dHz\n", __func__, clock);
1062 return false;
1063 }
1064 TRACE("%s: Full divider (p) found is %d\n", __func__, ctx.p);
1065
1066 /*
1067 * gcc incorrectly analyses that these can be used without being
1068 * initialized. To be fair, it's hard to guess.
1069 */
1070 p0 = p1 = p2 = 0;
1071 skl_wrpll_get_multipliers(ctx.p, &p0, &p1, &p2);
1072 skl_wrpll_params_populate(wrpll_params, afe_clock, ref_clock,
1073 ctx.central_freq, p0, p1, p2);
1074
1075 return true;
1076 }
1077
1078