1 /* Nvidia TNT and GeForce Back End Scaler functions */
2 /* Written by Rudolf Cornelissen 05/2002-5/2009 */
3
4 #define MODULE_BIT 0x00000200
5
6 #include "nv_std.h"
7
8 typedef struct move_overlay_info move_overlay_info;
9
10 struct move_overlay_info
11 {
12 uint32 hcoordv; /* left and right edges of video output window */
13 uint32 vcoordv; /* top and bottom edges of video output window */
14 uint32 hsrcstv; /* horizontal source start in source buffer (clipping) */
15 uint32 v1srcstv; /* vertical source start in source buffer (clipping) */
16 uintptr_t a1orgv; /* alternate source clipping via startadress of source buffer */
17 };
18
19 static void nv_bes_calc_move_overlay(move_overlay_info *moi);
20 static void nv_bes_program_move_overlay(move_overlay_info moi);
21
22 /* move the overlay output window in virtualscreens */
23 /* Note:
24 * si->dm.h_display_start and si->dm.v_display_start determine where the new
25 * output window is located! */
nv_bes_move_overlay()26 void nv_bes_move_overlay()
27 {
28 move_overlay_info moi;
29
30 /* abort if overlay is not active */
31 if (!si->overlay.active) return;
32
33 nv_bes_calc_move_overlay(&moi);
34 nv_bes_program_move_overlay(moi);
35 }
36
nv_bes_calc_move_overlay(move_overlay_info * moi)37 static void nv_bes_calc_move_overlay(move_overlay_info *moi)
38 {
39 /* misc used variables */
40 uint16 temp1, temp2;
41 /* visible screen window in virtual workspaces */
42 uint16 crtc_hstart, crtc_vstart, crtc_hend, crtc_vend;
43
44 /* do 'overlay follow head' in dualhead modes on dualhead cards */
45 if (si->ps.secondary_head)
46 {
47 switch (si->dm.flags & DUALHEAD_BITS)
48 {
49 case DUALHEAD_ON:
50 case DUALHEAD_SWITCH:
51 if ((si->overlay.ow.h_start + (si->overlay.ow.width / 2)) <
52 (si->dm.h_display_start + si->dm.timing.h_display))
53 nv_bes_to_crtc(si->crtc_switch_mode);
54 else
55 nv_bes_to_crtc(!si->crtc_switch_mode);
56 break;
57 default:
58 nv_bes_to_crtc(si->crtc_switch_mode);
59 break;
60 }
61 }
62
63 /* the BES does not respect virtual_workspaces, but adheres to CRTC
64 * constraints only */
65 crtc_hstart = si->dm.h_display_start;
66 /* make dualhead stretch and switch mode work while we're at it.. */
67 if (si->overlay.crtc)
68 {
69 crtc_hstart += si->dm.timing.h_display;
70 }
71
72 /* horizontal end is the first position beyond the displayed range on the CRTC */
73 crtc_hend = crtc_hstart + si->dm.timing.h_display;
74 crtc_vstart = si->dm.v_display_start;
75 /* vertical end is the first position beyond the displayed range on the CRTC */
76 crtc_vend = crtc_vstart + si->dm.timing.v_display;
77
78
79 /****************************************
80 *** setup all edges of output window ***
81 ****************************************/
82
83 /* setup left and right edges of output window */
84 moi->hcoordv = 0;
85 /* left edge coordinate of output window, must be inside desktop */
86 /* clipping on the left side */
87 if (si->overlay.ow.h_start < crtc_hstart)
88 {
89 temp1 = 0;
90 }
91 else
92 {
93 /* clipping on the right side */
94 if (si->overlay.ow.h_start >= (crtc_hend - 1))
95 {
96 /* width < 2 is not allowed */
97 temp1 = (crtc_hend - crtc_hstart - 2) & 0x7ff;
98 }
99 else
100 /* no clipping here */
101 {
102 temp1 = (si->overlay.ow.h_start - crtc_hstart) & 0x7ff;
103 }
104 }
105 moi->hcoordv |= temp1 << 16;
106 /* right edge coordinate of output window, must be inside desktop */
107 /* width < 2 is not allowed */
108 if (si->overlay.ow.width < 2)
109 {
110 temp2 = (temp1 + 1) & 0x7ff;
111 }
112 else
113 {
114 /* clipping on the right side */
115 if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) > (crtc_hend - 1))
116 {
117 temp2 = (crtc_hend - crtc_hstart - 1) & 0x7ff;
118 }
119 else
120 {
121 /* clipping on the left side */
122 if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
123 {
124 /* width < 2 is not allowed */
125 temp2 = 1;
126 }
127 else
128 /* no clipping here */
129 {
130 temp2 = ((uint16)(si->overlay.ow.h_start + si->overlay.ow.width - crtc_hstart - 1)) & 0x7ff;
131 }
132 }
133 }
134 moi->hcoordv |= temp2 << 0;
135 LOG(4,("Overlay: CRTC left-edge output %d, right-edge output %d\n",temp1, temp2));
136
137 /* setup top and bottom edges of output window */
138 moi->vcoordv = 0;
139 /* top edge coordinate of output window, must be inside desktop */
140 /* clipping on the top side */
141 if (si->overlay.ow.v_start < crtc_vstart)
142 {
143 temp1 = 0;
144 }
145 else
146 {
147 /* clipping on the bottom side */
148 if (si->overlay.ow.v_start >= (crtc_vend - 1))
149 {
150 /* height < 2 is not allowed */
151 temp1 = (crtc_vend - crtc_vstart - 2) & 0x7ff;
152 }
153 else
154 /* no clipping here */
155 {
156 temp1 = (si->overlay.ow.v_start - crtc_vstart) & 0x7ff;
157 }
158 }
159 moi->vcoordv |= temp1 << 16;
160 /* bottom edge coordinate of output window, must be inside desktop */
161 /* height < 2 is not allowed */
162 if (si->overlay.ow.height < 2)
163 {
164 temp2 = (temp1 + 1) & 0x7ff;
165 }
166 else
167 {
168 /* clipping on the bottom side */
169 if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) > (crtc_vend - 1))
170 {
171 temp2 = (crtc_vend - crtc_vstart - 1) & 0x7ff;
172 }
173 else
174 {
175 /* clipping on the top side */
176 if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
177 {
178 /* height < 2 is not allowed */
179 temp2 = 1;
180 }
181 else
182 /* no clipping here */
183 {
184 temp2 = ((uint16)(si->overlay.ow.v_start + si->overlay.ow.height - crtc_vstart - 1)) & 0x7ff;
185 }
186 }
187 }
188 moi->vcoordv |= temp2 << 0;
189 LOG(4,("Overlay: CRTC top-edge output %d, bottom-edge output %d\n",temp1, temp2));
190
191
192 /*********************************
193 *** setup horizontal clipping ***
194 *********************************/
195
196 /* Setup horizontal source start: first (sub)pixel contributing to output picture */
197 /* Note:
198 * The method is to calculate, based on 1:1 scaling, based on the output window.
199 * After this is done, include the scaling factor so you get a value based on the input bitmap.
200 * Then add the left starting position of the bitmap's view (zoom function) to get the final value needed.
201 * Note: The input bitmaps slopspace is automatically excluded from the calculations this way! */
202 /* Note also:
203 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
204 moi->hsrcstv = 0;
205 /* check for destination horizontal clipping at left side */
206 if (si->overlay.ow.h_start < crtc_hstart)
207 {
208 /* check if entire destination picture is clipping left:
209 * (2 pixels will be clamped onscreen at least) */
210 if ((si->overlay.ow.h_start + si->overlay.ow.width - 1) < (crtc_hstart + 1))
211 {
212 /* increase 'first contributing pixel' with 'fixed value': (total dest. width - 2) */
213 moi->hsrcstv += (si->overlay.ow.width - 2);
214 }
215 else
216 {
217 /* increase 'first contributing pixel' with actual number of dest. clipping pixels */
218 moi->hsrcstv += (crtc_hstart - si->overlay.ow.h_start);
219 }
220 LOG(4,("Overlay: clipping left...\n"));
221
222 /* The calculated value is based on scaling = 1x. So we now compensate for scaling.
223 * Note that this also already takes care of aligning the value to the BES register! */
224 moi->hsrcstv *= si->overlay.h_ifactor;
225 }
226 /* take zoom into account */
227 moi->hsrcstv += ((uint32)si->overlay.my_ov.h_start) << 16;
228 /* AND below required by hardware (> 1024 support confirmed on all cards) */
229 moi->hsrcstv &= 0x07fffffc;
230 LOG(4,("Overlay: first hor. (sub)pixel of input bitmap contributing %f\n", moi->hsrcstv / (float)65536));
231
232
233 /*******************************
234 *** setup vertical clipping ***
235 *******************************/
236
237 /* calculate inputbitmap origin adress */
238 moi->a1orgv = (uintptr_t)((vuint32 *)si->overlay.ob.buffer);
239 moi->a1orgv -= (uintptr_t)((vuint32 *)si->framebuffer);
240 LOG(4, ("Overlay: topleft corner of input bitmap (cardRAM offset) $%08x\n", moi->a1orgv));
241
242 /* Setup vertical source start: first (sub)pixel contributing to output picture. */
243 /* Note:
244 * The method is to calculate, based on 1:1 scaling, based on the output window.
245 * 'After' this is done, include the scaling factor so you get a value based on the input bitmap.
246 * Then add the top starting position of the bitmap's view (zoom function) to get the final value needed. */
247 /* Note also:
248 * Even if the scaling factor is clamping we instruct the BES to use the correct source start pos.! */
249
250 moi->v1srcstv = 0;
251 /* check for destination vertical clipping at top side */
252 if (si->overlay.ow.v_start < crtc_vstart)
253 {
254 /* check if entire destination picture is clipping at top:
255 * (2 pixels will be clamped onscreen at least) */
256 if ((si->overlay.ow.v_start + si->overlay.ow.height - 1) < (crtc_vstart + 1))
257 {
258 /* increase 'number of clipping pixels' with 'fixed value':
259 * 'total height - 2' of dest. picture in pixels * inverse scaling factor */
260 moi->v1srcstv = (si->overlay.ow.height - 2) * si->overlay.v_ifactor;
261 /* on pre-NV10 we need to do clipping in the source
262 * bitmap because no seperate clipping registers exist... */
263 if (si->ps.card_arch < NV10A)
264 moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row);
265 }
266 else
267 {
268 /* increase 'first contributing pixel' with:
269 * number of destination picture clipping pixels * inverse scaling factor */
270 moi->v1srcstv = (crtc_vstart - si->overlay.ow.v_start) * si->overlay.v_ifactor;
271 /* on pre-NV10 we need to do clipping in the source
272 * bitmap because no seperate clipping registers exist... */
273 if (si->ps.card_arch < NV10A)
274 moi->a1orgv += ((moi->v1srcstv >> 16) * si->overlay.ob.bytes_per_row);
275 }
276 LOG(4,("Overlay: clipping at top...\n"));
277 }
278 /* take zoom into account */
279 moi->v1srcstv += (((uint32)si->overlay.my_ov.v_start) << 16);
280 if (si->ps.card_arch < NV10A)
281 {
282 moi->a1orgv += (si->overlay.my_ov.v_start * si->overlay.ob.bytes_per_row);
283 LOG(4,("Overlay: 'contributing part of buffer' origin is (cardRAM offset) $%08x\n", moi->a1orgv));
284 }
285 LOG(4,("Overlay: first vert. (sub)pixel of input bitmap contributing %f\n", moi->v1srcstv / (float)65536));
286
287 /* AND below is probably required by hardware. */
288 /* Buffer A topleft corner of field 1 (origin)(field 1 contains our full frames) */
289 moi->a1orgv &= 0xfffffff0;
290 }
291
nv_bes_program_move_overlay(move_overlay_info moi)292 static void nv_bes_program_move_overlay(move_overlay_info moi)
293 {
294 /*************************************
295 *** sync to BES (Back End Scaler) ***
296 *************************************/
297
298 /* Done in card hardware:
299 * double buffered registers + trigger if programming complete feature. */
300
301
302 /**************************************
303 *** actually program the registers ***
304 **************************************/
305
306 if (si->ps.card_arch < NV10A)
307 {
308 /* unknown, but needed (otherwise high-res distortions and only half the frames */
309 BESW(NV04_OE_STATE, 0x00000000);
310 /* select buffer 0 as active (b16) */
311 BESW(NV04_SU_STATE, 0x00000000);
312 /* unknown (no effect?) */
313 BESW(NV04_RM_STATE, 0x00000000);
314 /* setup clipped(!) buffer startadress in RAM */
315 /* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping
316 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */
317 /* (program both buffers to prevent sync distortions) */
318 /* first include 'pixel precise' left clipping... (top clipping was already included) */
319 moi.a1orgv += ((moi.hsrcstv >> 16) * 2);
320 /* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */
321 BESW(NV04_0BUFADR, (moi.a1orgv & ~0x03));
322 BESW(NV04_1BUFADR, (moi.a1orgv & ~0x03));
323 /* setup output window position */
324 BESW(NV04_DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
325 /* setup output window size */
326 BESW(NV04_DSTSIZE, (
327 (((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
328 ((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
329 ));
330 /* select buffer 1 as active (b16) */
331 BESW(NV04_SU_STATE, 0x00010000);
332 }
333 else
334 {
335 /* >= NV10A */
336
337 /* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */
338 BESW(NV10_0SRCREF, ((moi.v1srcstv << 4) & 0xffff0000) | ((moi.hsrcstv >> 12) & 0x0000ffff));
339 /* setup output window position */
340 BESW(NV10_0DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
341 /* setup output window size */
342 BESW(NV10_0DSTSIZE, (
343 (((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
344 ((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
345 ));
346 /* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */
347 /* This also triggers activation of programmed values (double buffered registers feature) */
348 BESW(NV10_BUFSEL, 0x00000001);
349 }
350 }
351
nv_bes_to_crtc(bool crtc)352 status_t nv_bes_to_crtc(bool crtc)
353 {
354 if (si->ps.secondary_head)
355 {
356 if (crtc)
357 {
358 LOG(4,("Overlay: switching overlay to CRTC2\n"));
359 /* switch overlay engine to CRTC2 */
360 NV_REG32(NV32_FUNCSEL) &= ~0x00001000;
361 NV_REG32(NV32_2FUNCSEL) |= 0x00001000;
362 si->overlay.crtc = !si->crtc_switch_mode;
363 }
364 else
365 {
366 LOG(4,("Overlay: switching overlay to CRTC1\n"));
367 /* switch overlay engine to CRTC1 */
368 NV_REG32(NV32_2FUNCSEL) &= ~0x00001000;
369 NV_REG32(NV32_FUNCSEL) |= 0x00001000;
370 si->overlay.crtc = si->crtc_switch_mode;
371 }
372 return B_OK;
373 }
374 else
375 {
376 return B_ERROR;
377 }
378 }
379
nv_bes_init()380 status_t nv_bes_init()
381 {
382 if (si->ps.card_arch < NV10A)
383 {
384 /* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */
385 BESW(NV04_INTE, 0x00000000);
386
387 /* setup saturation to be 'neutral' */
388 BESW(NV04_SAT, 0x00000000);
389 /* setup RGB brightness to be 'neutral' */
390 BESW(NV04_RED_AMP, 0x00000069);
391 BESW(NV04_GRN_AMP, 0x0000003e);
392 BESW(NV04_BLU_AMP, 0x00000089);
393
394 /* setup fifo for fetching data */
395 BESW(NV04_FIFOBURL, 0x00000003);
396 BESW(NV04_FIFOTHRS, 0x00000038);
397
398 /* unknown, but needed (registers only have b0 implemented) */
399 /* (program both buffers to prevent sync distortions) */
400 BESW(NV04_0OFFSET, 0x00000000);
401 BESW(NV04_1OFFSET, 0x00000000);
402 }
403 else
404 {
405 /* >= NV10A */
406
407 /* disable overlay ints (b0 = buffer 0, b4 = buffer 1) */
408 BESW(NV10_INTE, 0x00000000);
409 /* shut off GeForce4MX MPEG2 decoder */
410 BESW(DEC_GENCTRL, 0x00000000);
411 /* setup BES memory-range mask */
412 BESW(NV10_0MEMMASK, (si->ps.memory_size - 1));
413 /* unknown, but needed */
414 BESW(NV10_0OFFSET, 0x00000000);
415
416 /* setup brightness, contrast and saturation to be 'neutral' */
417 BESW(NV10_0BRICON, ((0x1000 << 16) | 0x1000));
418 BESW(NV10_0SAT, ((0x0000 << 16) | 0x1000));
419 }
420
421 /* make sure the engine is disabled. */
422 nv_release_bes();
423
424 return B_OK;
425 }
426
nv_configure_bes(const overlay_buffer * ob,const overlay_window * ow,const overlay_view * ov,int offset)427 status_t nv_configure_bes
428 (const overlay_buffer *ob, const overlay_window *ow, const overlay_view *ov, int offset)
429 {
430 /* yuy2 (4:2:2) colorspace calculations */
431
432 /* Note:
433 * in BeOS R5.0.3 and DANO:
434 * 'ow->offset_xxx' is always 0, so not used;
435 * 'ow->width' and 'ow->height' are the output window size: does not change
436 * if window is clipping;
437 * 'ow->h_start' and 'ow->v_start' are the left-top position of the output
438 * window. These values can be negative: this means the window is clipping
439 * at the left or the top of the display, respectively. */
440
441 /* 'ov' is the view in the source bitmap, so which part of the bitmap is actually
442 * displayed on screen. This is used for the 'hardware zoom' function. */
443
444 /* output window position and clipping info for source buffer */
445 move_overlay_info moi;
446 /* calculated BES register values */
447 uint32 hiscalv, viscalv;
448 /* interval representation, used for scaling calculations */
449 uint16 intrep;
450 /* inverse scaling factor, used for source positioning */
451 uint32 ifactor;
452 /* copy of overlay view which has checked valid values */
453 overlay_view my_ov;
454
455
456 /**************************************************************************************
457 *** copy, check and limit if needed the user-specified view into the intput bitmap ***
458 **************************************************************************************/
459 my_ov = *ov;
460 /* check for valid 'coordinates' */
461 if (my_ov.width == 0) my_ov.width++;
462 if (my_ov.height == 0) my_ov.height++;
463 if (my_ov.h_start > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
464 my_ov.h_start = ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1);
465 if (((my_ov.h_start + my_ov.width) - 1) > ((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1))
466 my_ov.width = ((((ob->width - si->overlay.myBufInfo[offset].slopspace) - 1) - my_ov.h_start) + 1);
467 if (my_ov.v_start > (ob->height - 1))
468 my_ov.v_start = (ob->height - 1);
469 if (((my_ov.v_start + my_ov.height) - 1) > (ob->height - 1))
470 my_ov.height = (((ob->height - 1) - my_ov.v_start) + 1);
471
472 LOG(4,("Overlay: inputbuffer view (zoom) left %d, top %d, width %d, height %d\n",
473 my_ov.h_start, my_ov.v_start, my_ov.width, my_ov.height));
474
475 /* save for nv_bes_calc_move_overlay() */
476 si->overlay.ow = *ow;
477 si->overlay.ob = *ob;
478 si->overlay.my_ov = my_ov;
479
480
481 /********************************
482 *** setup horizontal scaling ***
483 ********************************/
484 LOG(4,("Overlay: total input picture width = %d, height = %d\n",
485 (ob->width - si->overlay.myBufInfo[offset].slopspace), ob->height));
486 LOG(4,("Overlay: output picture width = %d, height = %d\n", ow->width, ow->height));
487
488 /* determine interval representation value, taking zoom into account */
489 if (ow->flags & B_OVERLAY_HORIZONTAL_FILTERING)
490 {
491 /* horizontal filtering is ON */
492 if ((my_ov.width == ow->width) | (ow->width < 2))
493 {
494 /* no horizontal scaling used, OR destination width < 2 */
495 intrep = 0;
496 }
497 else
498 {
499 intrep = 1;
500 }
501 }
502 else
503 {
504 /* horizontal filtering is OFF */
505 if ((ow->width < my_ov.width) & (ow->width >= 2))
506 {
507 /* horizontal downscaling used AND destination width >= 2 */
508 intrep = 1;
509 }
510 else
511 {
512 intrep = 0;
513 }
514 }
515 LOG(4,("Overlay: horizontal interval representation value is %d\n",intrep));
516
517 /* calculate inverse horizontal scaling factor, taking zoom into account */
518 /* standard scaling formula: */
519 ifactor = (((uint32)(my_ov.width - intrep)) << 16) / (ow->width - intrep);
520
521 /* correct factor to prevent most-right visible 'line' from distorting */
522 ifactor -= (1 << 2);
523 hiscalv = ifactor;
524 /* save for nv_bes_calc_move_overlay() */
525 si->overlay.h_ifactor = ifactor;
526 LOG(4,("Overlay: horizontal scaling factor is %f\n", (float)65536 / ifactor));
527
528 /* check scaling factor (and modify if needed) to be within scaling limits */
529 /* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */
530 if (hiscalv < 0x00002000)
531 {
532 /* (non-inverse) factor too large, set factor to max. valid value */
533 hiscalv = 0x00002000;
534 LOG(4,("Overlay: horizontal scaling factor too large, clamping at %f\n", (float)65536 / hiscalv));
535 }
536 switch (si->ps.card_arch)
537 {
538 case NV04A:
539 /* Riva128-TNT2 series have a 'downscaling' limit of 1.000489
540 * (16bit register with 0.11 format value) */
541 if (hiscalv > 0x0000ffff)
542 {
543 /* (non-inverse) factor too small, set factor to min. valid value */
544 hiscalv = 0x0000ffff;
545 LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)2048 / (hiscalv >> 5)));
546 }
547 break;
548 case NV30A:
549 case NV40A:
550 /* GeForceFX series and up have a downscaling limit of 0.5 (except NV31!) */
551 if ((hiscalv > (2 << 16)) && (si->ps.card_type != NV31))
552 {
553 /* (non-inverse) factor too small, set factor to min. valid value */
554 hiscalv = (2 << 16);
555 LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv));
556 }
557 /* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits!
558 * So let it fall through... */
559 if (si->ps.card_type != NV31) break;
560 default:
561 /* the rest has a downscaling limit of 0.125 */
562 if (hiscalv > (8 << 16))
563 {
564 /* (non-inverse) factor too small, set factor to min. valid value */
565 hiscalv = (8 << 16);
566 LOG(4,("Overlay: horizontal scaling factor too small, clamping at %f\n", (float)65536 / hiscalv));
567 }
568 break;
569 }
570 /* AND below is required by hardware */
571 hiscalv &= 0x001ffffc;
572
573
574 /******************************
575 *** setup vertical scaling ***
576 ******************************/
577
578 /* determine interval representation value, taking zoom into account */
579 if (ow->flags & B_OVERLAY_VERTICAL_FILTERING)
580 {
581 /* vertical filtering is ON */
582 if ((my_ov.height == ow->height) | (ow->height < 2))
583 {
584 /* no vertical scaling used, OR destination height < 2 */
585 intrep = 0;
586 }
587 else
588 {
589 intrep = 1;
590 }
591 }
592 else
593 {
594 /* vertical filtering is OFF */
595 if ((ow->height < my_ov.height) & (ow->height >= 2))
596 {
597 /* vertical downscaling used AND destination height >= 2 */
598 intrep = 1;
599 }
600 else
601 {
602 intrep = 0;
603 }
604 }
605 LOG(4,("Overlay: vertical interval representation value is %d\n",intrep));
606
607 /* calculate inverse vertical scaling factor, taking zoom into account */
608 /* standard scaling formula: */
609 ifactor = (((uint32)(my_ov.height - intrep)) << 16) / (ow->height - intrep);
610
611 /* correct factor to prevent lowest visible line from distorting */
612 ifactor -= (1 << 2);
613 LOG(4,("Overlay: vertical scaling factor is %f\n", (float)65536 / ifactor));
614
615 /* preserve ifactor for source positioning calculations later on */
616 viscalv = ifactor;
617 /* save for nv_bes_calc_move_overlay() */
618 si->overlay.v_ifactor = ifactor;
619
620 /* check scaling factor (and modify if needed) to be within scaling limits */
621 /* all cards have a upscaling limit of 8.0 (see official nVidia specsheets) */
622 if (viscalv < 0x00002000)
623 {
624 /* (non-inverse) factor too large, set factor to max. valid value */
625 viscalv = 0x00002000;
626 LOG(4,("Overlay: vertical scaling factor too large, clamping at %f\n", (float)65536 / viscalv));
627 }
628 switch (si->ps.card_arch)
629 {
630 case NV04A:
631 /* Riva128-TNT2 series have a 'downscaling' limit of 1.000489
632 * (16bit register with 0.11 format value) */
633 if (viscalv > 0x0000ffff)
634 {
635 /* (non-inverse) factor too small, set factor to min. valid value */
636 viscalv = 0x0000ffff;
637 LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)2048 / (viscalv >> 5)));
638 }
639 break;
640 case NV30A:
641 case NV40A:
642 /* GeForceFX series and up have a downscaling limit of 0.5 (except NV31!) */
643 if ((viscalv > (2 << 16)) && (si->ps.card_type != NV31))
644 {
645 /* (non-inverse) factor too small, set factor to min. valid value */
646 viscalv = (2 << 16);
647 LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv));
648 }
649 /* NV31 (confirmed GeForceFX 5600) has NV20A scaling limits!
650 * So let it fall through... */
651 if (si->ps.card_type != NV31) break;
652 default:
653 /* the rest has a downscaling limit of 0.125 */
654 if (viscalv > (8 << 16))
655 {
656 /* (non-inverse) factor too small, set factor to min. valid value */
657 viscalv = (8 << 16);
658 LOG(4,("Overlay: vertical scaling factor too small, clamping at %f\n", (float)65536 / viscalv));
659 }
660 break;
661 }
662 /* AND below is required by hardware */
663 viscalv &= 0x001ffffc;
664
665
666 /********************************************************************************
667 *** setup all edges of output window, setup horizontal and vertical clipping ***
668 ********************************************************************************/
669 nv_bes_calc_move_overlay(&moi);
670
671
672 /*****************************
673 *** log color keying info ***
674 *****************************/
675
676 LOG(4,("Overlay: key_red %d, key_green %d, key_blue %d, key_alpha %d\n",
677 ow->red.value, ow->green.value, ow->blue.value, ow->alpha.value));
678 LOG(4,("Overlay: mask_red %d, mask_green %d, mask_blue %d, mask_alpha %d\n",
679 ow->red.mask, ow->green.mask, ow->blue.mask, ow->alpha.mask));
680
681
682 /*****************
683 *** log flags ***
684 *****************/
685
686 LOG(4,("Overlay: ow->flags is $%08x\n",ow->flags));
687 /* BTW: horizontal and vertical filtering are fixed and turned on for GeForce overlay. */
688
689
690 /*************************************
691 *** sync to BES (Back End Scaler) ***
692 *************************************/
693
694 /* Done in card hardware:
695 * double buffered registers + trigger if programming complete feature. */
696
697
698 /**************************************
699 *** actually program the registers ***
700 **************************************/
701
702 if (si->ps.card_arch < NV10A)
703 {
704 /* unknown, but needed (otherwise high-res distortions and only half the frames */
705 BESW(NV04_OE_STATE, 0x00000000);
706 /* select buffer 0 as active (b16) */
707 BESW(NV04_SU_STATE, 0x00000000);
708 /* unknown (no effect?) */
709 BESW(NV04_RM_STATE, 0x00000000);
710 /* setup clipped(!) buffer startadress in RAM */
711 /* RIVA128 - TNT bes doesn't have clipping registers, so no subpixelprecise clipping
712 * either. We do pixelprecise vertical and 'two pixel' precise horizontal clipping here. */
713 /* (program both buffers to prevent sync distortions) */
714 /* first include 'pixel precise' left clipping... (top clipping was already included) */
715 moi.a1orgv += ((moi.hsrcstv >> 16) * 2);
716 /* we need to step in 4-byte (2 pixel) granularity due to the nature of yuy2 */
717 BESW(NV04_0BUFADR, (moi.a1orgv & ~0x03));
718 BESW(NV04_1BUFADR, (moi.a1orgv & ~0x03));
719 /* setup buffer source pitch including slopspace (in bytes).
720 * Note:
721 * source pitch granularity = 16 pixels on the RIVA128 - TNT (so pre-NV10) bes */
722 /* (program both buffers to prevent sync distortions) */
723 BESW(NV04_0SRCPTCH, (ob->width * 2));
724 BESW(NV04_1SRCPTCH, (ob->width * 2));
725 /* setup output window position */
726 BESW(NV04_DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
727 /* setup output window size */
728 BESW(NV04_DSTSIZE, (
729 (((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
730 ((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
731 ));
732 /* setup horizontal and vertical scaling */
733 BESW(NV04_ISCALVH, (((viscalv << 16) >> 5) | (hiscalv >> 5)));
734 /* enable vertical filtering (b0) */
735 BESW(NV04_CTRL_V, 0x00000001);
736 /* enable horizontal filtering (no effect?) */
737 BESW(NV04_CTRL_H, 0x00000111);
738 /* enable BES (b0), set colorkeying (b4), format yuy2 (b8: 0 = ccir) */
739 if (ow->flags & B_OVERLAY_COLOR_KEY)
740 BESW(NV04_GENCTRL, 0x00000111);
741 else
742 BESW(NV04_GENCTRL, 0x00000101);
743 /* select buffer 1 as active (b16) */
744 BESW(NV04_SU_STATE, 0x00010000);
745
746 /**************************
747 *** setup color keying ***
748 **************************/
749
750 /* setup colorkeying */
751 switch(si->dm.space)
752 {
753 case B_RGB15_LITTLE:
754 BESW(NV04_COLKEY, (
755 ((ow->blue.value & ow->blue.mask) << 0) |
756 ((ow->green.value & ow->green.mask) << 5) |
757 ((ow->red.value & ow->red.mask) << 10) |
758 ((ow->alpha.value & ow->alpha.mask) << 15)
759 ));
760 break;
761 case B_RGB16_LITTLE:
762 BESW(NV04_COLKEY, (
763 ((ow->blue.value & ow->blue.mask) << 0) |
764 ((ow->green.value & ow->green.mask) << 5) |
765 ((ow->red.value & ow->red.mask) << 11)
766 /* this space has no alpha bits */
767 ));
768 break;
769 case B_CMAP8:
770 case B_RGB32_LITTLE:
771 default:
772 BESW(NV04_COLKEY, (
773 ((ow->blue.value & ow->blue.mask) << 0) |
774 ((ow->green.value & ow->green.mask) << 8) |
775 ((ow->red.value & ow->red.mask) << 16) |
776 ((ow->alpha.value & ow->alpha.mask) << 24)
777 ));
778 break;
779 }
780 }
781 else
782 {
783 /* >= NV10A */
784
785 /* setup buffer origin: GeForce uses subpixel precise clipping on left and top! (12.4 values) */
786 BESW(NV10_0SRCREF, ((moi.v1srcstv << 4) & 0xffff0000) | ((moi.hsrcstv >> 12) & 0x0000ffff));
787 /* setup buffersize */
788 //fixme if needed: width must be even officially...
789 BESW(NV10_0SRCSIZE, ((ob->height << 16) | ob->width));
790 /* setup source pitch including slopspace (in bytes),
791 * b16: select YUY2 (0 = YV12), b20: set colorkeying, b24: no iturbt_709 (do iturbt_601) */
792 /* Note:
793 * source pitch granularity = 32 pixels on GeForce cards!! */
794 if (ow->flags & B_OVERLAY_COLOR_KEY)
795 BESW(NV10_0SRCPTCH, (((ob->width * 2) & 0x0000ffff) | (1 << 16) | (1 << 20) | (0 << 24)));
796 else
797 BESW(NV10_0SRCPTCH, (((ob->width * 2) & 0x0000ffff) | (1 << 16) | (0 << 20) | (0 << 24)));
798 /* setup output window position */
799 BESW(NV10_0DSTREF, ((moi.vcoordv & 0xffff0000) | ((moi.hcoordv & 0xffff0000) >> 16)));
800 /* setup output window size */
801 BESW(NV10_0DSTSIZE, (
802 (((moi.vcoordv & 0x0000ffff) - ((moi.vcoordv & 0xffff0000) >> 16) + 1) << 16) |
803 ((moi.hcoordv & 0x0000ffff) - ((moi.hcoordv & 0xffff0000) >> 16) + 1)
804 ));
805 /* setup horizontal scaling */
806 BESW(NV10_0ISCALH, (hiscalv << 4));
807 /* setup vertical scaling */
808 BESW(NV10_0ISCALV, (viscalv << 4));
809 /* setup (unclipped!) buffer startadress in RAM */
810 BESW(NV10_0BUFADR, moi.a1orgv);
811 /* enable BES (b0 = 0) */
812 BESW(NV10_GENCTRL, 0x00000000);
813 /* We only use buffer buffer 0: select it. (0x01 = buffer 0, 0x10 = buffer 1) */
814 /* This also triggers activation of programmed values (double buffered registers feature) */
815 BESW(NV10_BUFSEL, 0x00000001);
816
817 /**************************
818 *** setup color keying ***
819 **************************/
820
821 /* setup colorkeying */
822 switch(si->dm.space)
823 {
824 case B_RGB15_LITTLE:
825 BESW(NV10_COLKEY, (
826 ((ow->blue.value & ow->blue.mask) << 0) |
827 ((ow->green.value & ow->green.mask) << 5) |
828 ((ow->red.value & ow->red.mask) << 10) |
829 ((ow->alpha.value & ow->alpha.mask) << 15)
830 ));
831 break;
832 case B_RGB16_LITTLE:
833 BESW(NV10_COLKEY, (
834 ((ow->blue.value & ow->blue.mask) << 0) |
835 ((ow->green.value & ow->green.mask) << 5) |
836 ((ow->red.value & ow->red.mask) << 11)
837 /* this space has no alpha bits */
838 ));
839 break;
840 case B_CMAP8:
841 case B_RGB32_LITTLE:
842 default:
843 BESW(NV10_COLKEY, (
844 ((ow->blue.value & ow->blue.mask) << 0) |
845 ((ow->green.value & ow->green.mask) << 8) |
846 ((ow->red.value & ow->red.mask) << 16) |
847 ((ow->alpha.value & ow->alpha.mask) << 24)
848 ));
849 break;
850 }
851 }
852
853 /* note that overlay is in use (for nv_bes_move_overlay()) */
854 si->overlay.active = true;
855
856 return B_OK;
857 }
858
nv_release_bes()859 status_t nv_release_bes()
860 {
861 if (si->ps.card_arch < NV10A)
862 {
863 /* setup BES control: disable scaler (b0 = 0) */
864 BESW(NV04_GENCTRL, 0x00000000);
865 }
866 else
867 {
868 /* setup BES control: disable scaler (b0 = 1) */
869 BESW(NV10_GENCTRL, 0x00000001);
870 }
871
872 /* note that overlay is not in use (for nv_bes_move_overlay()) */
873 si->overlay.active = false;
874
875 return B_OK;
876 }
877