1 /* MGA Acceleration functions */ 2 /* Authors: 3 Mark Watson 2/2000, 4 Rudolf Cornelissen 10/2002-1/2006. 5 */ 6 7 #define MODULE_BIT 0x00080000 8 9 #include "mga_std.h" 10 11 /*acceleration notes*/ 12 13 /*functions Be's app_server uses: 14 fill span (horizontal only) 15 fill rectangle (these 2 are very similar) 16 invert rectangle 17 blit 18 */ 19 20 /* needed by MIL 1/2 because of adress linearisation constraints */ 21 #define ACCW_YDSTLEN(dst, len) do { \ 22 if (si->engine.y_lin) { \ 23 ACCW(YDST,((dst)* (si->fbc.bytes_per_row / (si->engine.depth >> 3))) >> 5); \ 24 ACCW(LEN,len); \ 25 } else ACCW(YDSTLEN,((dst)<<16)|(len)); \ 26 } while (0) 27 28 status_t gx00_acc_wait_idle() 29 { 30 /* wait until engine completely idle */ 31 while (ACCR(STATUS) & 0x00010000) 32 { 33 /* snooze a bit so I do not hammer the bus */ 34 snooze (100); 35 } 36 37 return B_OK; 38 } 39 40 /* AFAIK this must be done for every new screenmode. 41 * Engine required init. */ 42 status_t gx00_acc_init() 43 { 44 /* used for convenience: MACCESS is a write only register! */ 45 uint32 maccess = 0x00000000; 46 /* if we were unable to read PINS, we have to assume something (keeping bit6 zero) */ 47 if ((si->ps.card_type >= G450) && (si->ps.pins_status == B_OK)) 48 { 49 /* b7 v5_mem_type = done by Mark Watson. fixme: still confirm! (unknown bits) */ 50 maccess |= ((((uint32)si->ps.v5_mem_type) & 0x80) >> 1); 51 } 52 53 /* preset using hardware adress linearisation */ 54 si->engine.y_lin = 0x00; 55 /* reset depth */ 56 si->engine.depth = 0; 57 58 /* cleanup bitblt */ 59 ACCW(OPMODE,0); 60 61 /* Set the Z origin to the start of FB (otherwise lockup on blits) */ 62 ACCW(ZORG,0); 63 64 /* Set pixel width */ 65 switch(si->dm.space) 66 { 67 case B_CMAP8: 68 ACCW(MACCESS, ((maccess & 0xfffffffc) | 0x00)); 69 si->engine.depth = 8; 70 break; 71 case B_RGB15_LITTLE:case B_RGB16_LITTLE: 72 ACCW(MACCESS, ((maccess & 0xfffffffc) | 0x01)); 73 si->engine.depth = 16; 74 break; 75 case B_RGB32_LITTLE:case B_RGBA32_LITTLE: 76 ACCW(MACCESS, ((maccess & 0xfffffffc) | 0x02)); 77 si->engine.depth = 32; 78 break; 79 default: 80 LOG(8,("ACC: init, invalid bit depth\n")); 81 return B_ERROR; 82 } 83 84 /* setup PITCH: very cardtype specific! */ 85 switch (si->ps.card_type) 86 { 87 case MIL1: 88 switch (si->fbc.bytes_per_row / (si->engine.depth >> 3)) 89 { 90 case 640: 91 case 768: 92 case 800: 93 case 960: 94 case 1024: 95 case 1152: 96 case 1280: 97 case 1600: 98 case 1920: 99 case 2048: 100 /* we are using hardware adress linearisation */ 101 break; 102 default: 103 /* we are using software adress linearisation */ 104 si->engine.y_lin = 0x01; 105 LOG(8,("ACC: using software adress linearisation\n")); 106 break; 107 } 108 ACCW(PITCH, (si->engine.y_lin << 15) | 109 ((si->fbc.bytes_per_row / (si->engine.depth >> 3)) & 0x0FFF)); 110 break; 111 case MIL2: 112 switch (si->fbc.bytes_per_row / (si->engine.depth >> 3)) 113 { 114 case 512: 115 case 640: 116 case 768: 117 case 800: 118 case 832: 119 case 960: 120 case 1024: 121 case 1152: 122 case 1280: 123 case 1600: 124 case 1664: 125 case 1920: 126 case 2048: 127 /* we are using hardware adress linearisation */ 128 break; 129 default: 130 /* we are using software adress linearisation */ 131 si->engine.y_lin = 0x01; 132 LOG(8,("ACC: using software adress linearisation\n")); 133 break; 134 } 135 ACCW(PITCH, (si->engine.y_lin << 15) | 136 ((si->fbc.bytes_per_row / (si->engine.depth >> 3)) & 0x0FFF)); 137 break; 138 case G100: 139 /* always using hardware adress linearisation, because 2D/3D 140 * engine works on every pitch multiple of 32 */ 141 ACCW(PITCH, ((si->fbc.bytes_per_row / (si->engine.depth >> 3)) & 0x0FFF)); 142 break; 143 default: 144 /* G200 and up are equal.. */ 145 /* always using hardware adress linearisation, because 2D/3D 146 * engine works on every pitch multiple of 32 */ 147 ACCW(PITCH, ((si->fbc.bytes_per_row / (si->engine.depth >> 3)) & 0x1FFF)); 148 break; 149 } 150 151 /* disable plane write mask (needed for SDRAM): actual change needed to get it sent to RAM */ 152 ACCW(PLNWT,0x00000000); 153 ACCW(PLNWT,0xffffffff); 154 155 if (si->ps.card_type >= G200) { 156 /*DSTORG - location of active screen in framebuffer*/ 157 ACCW(DSTORG,((uint8*)si->fbc.frame_buffer) - ((uint8*)si->framebuffer)); 158 159 /*SRCORG - init source address - same as dest*/ 160 ACCW(SRCORG,((uint8*)si->fbc.frame_buffer) - ((uint8*)si->framebuffer)); 161 } 162 163 /* init YDSTORG - apsed, if not inited, BitBlts may fails on <= G200 */ 164 si->engine.src_dst = 0; 165 ACCW(YDSTORG, si->engine.src_dst); 166 167 /* <= G100 uses this register as SRCORG/DSTORG replacement, but 168 * MIL 1/2 does not need framebuffer space for the hardcursor! */ 169 if ((si->ps.card_type == G100) && (si->settings.hardcursor)) 170 { 171 switch (si->dm.space) 172 { 173 case B_CMAP8: 174 si->engine.src_dst = 1024 / 1; 175 break; 176 case B_RGB15_LITTLE: 177 case B_RGB16_LITTLE: 178 si->engine.src_dst = 1024 / 2; 179 break; 180 case B_RGB32_LITTLE: 181 si->engine.src_dst = 1024 / 4; 182 break; 183 default: 184 LOG(8,("ACC: G100 hardcursor not supported for current colorspace\n")); 185 return B_ERROR; 186 } 187 } 188 ACCW(YDSTORG, si->engine.src_dst); 189 190 /* clipping */ 191 /* i.e. highest and lowest X pixel adresses */ 192 ACCW(CXBNDRY,(((si->fbc.bytes_per_row / (si->engine.depth >> 3)) - 1) << 16) | (0)); 193 194 /* Y pixel addresses must be linear */ 195 /* lowest adress */ 196 ACCW(YTOP, 0 + si->engine.src_dst); 197 /* highest adress */ 198 ACCW(YBOT,((si->dm.virtual_height - 1) * 199 (si->fbc.bytes_per_row / (si->engine.depth >> 3))) + si->engine.src_dst); 200 201 return B_OK; 202 } 203 204 205 /* 206 note: 207 moved acceleration 'top-level' routines to be integrated in the engine: 208 it is costly to call the engine for every single function within a loop! 209 (measured with BeRoMeter 1.2.6: upto 15% speed increase on all CPU's.) 210 */ 211 212 /* screen to screen blit - i.e. move windows around. 213 * Engine function bitblit, paragraph 4.5.7.2 */ 214 void SCREEN_TO_SCREEN_BLIT(engine_token *et, blit_params *list, uint32 count) 215 { 216 uint32 t_start,t_end,offset; 217 uint32 b_start,b_end; 218 int i = 0; 219 220 /* calc offset 'per line' */ 221 offset = (si->fbc.bytes_per_row / (si->engine.depth >> 3)); 222 223 while (count--) 224 { 225 /* find where the top and bottom are */ 226 t_end = t_start = 227 list[i].src_left + (offset * list[i].src_top) + si->engine.src_dst; 228 t_end += list[i].width; 229 230 b_end = b_start = 231 list[i].src_left + (offset * (list[i].src_top + list[i].height)) + si->engine.src_dst; 232 b_end += list[i].width; 233 234 /* sgnzero bit _must_ be '0' before accessing SGN! */ 235 ACCW(DWGCTL, 0x00000000); 236 237 /*find which quadrant */ 238 switch((list[i].dest_top > list[i].src_top) | ((list[i].dest_left > list[i].src_left) << 1)) 239 { 240 case 0: /*L->R,down*/ 241 ACCW(SGN, 0); 242 ACCW(AR3, t_start); 243 ACCW(AR0, t_end); 244 ACCW(AR5, offset); 245 ACCW_YDSTLEN(list[i].dest_top, list[i].height + 1); 246 break; 247 case 1: /*L->R,up*/ 248 ACCW(SGN, 4); 249 ACCW(AR3, b_start); 250 ACCW(AR0, b_end); 251 ACCW(AR5, -offset); 252 ACCW_YDSTLEN(list[i].dest_top + list[i].height, list[i].height + 1); 253 break; 254 case 2: /*R->L,down*/ 255 ACCW(SGN, 1); 256 ACCW(AR3, t_end); 257 ACCW(AR0, t_start); 258 ACCW(AR5, offset); 259 ACCW_YDSTLEN(list[i].dest_top, list[i].height + 1); 260 break; 261 case 3: /*R->L,up*/ 262 ACCW(SGN, 5); 263 ACCW(AR3, b_end); 264 ACCW(AR0, b_start); 265 ACCW(AR5, -offset); 266 ACCW_YDSTLEN(list[i].dest_top + list[i].height, list[i].height + 1); 267 break; 268 } 269 ACCW(FXBNDRY,((list[i].dest_left + list[i].width) << 16) | list[i].dest_left); 270 271 /* start the blit */ 272 ACCGO(DWGCTL, 0x040c4018); // atype RSTR 273 i++; 274 } 275 } 276 277 /* screen to screen tranparent blit - not sure what uses this. 278 * Engine function bitblit, paragraph 4.5.7.2 */ 279 //WARNING: 280 //yet untested function!! 281 void SCREEN_TO_SCREEN_TRANSPARENT_BLIT(engine_token *et, uint32 transparent_colour, blit_params *list, uint32 count) 282 { 283 uint32 t_start,t_end,offset; 284 uint32 b_start,b_end; 285 int i = 0; 286 287 /* calc offset 'per line' */ 288 offset = (si->fbc.bytes_per_row / (si->engine.depth >> 3)); 289 290 while (count--) 291 { 292 /* find where the top and bottom are */ 293 t_end = t_start = 294 list[i].src_left + (offset * list[i].src_top) + si->engine.src_dst; 295 t_end += list[i].width; 296 297 b_end = b_start = 298 list[i].src_left + (offset * (list[i].src_top + list[i].height)) + si->engine.src_dst; 299 b_end += list[i].width; 300 301 /* sgnzero bit _must_ be '0' before accessing SGN! */ 302 ACCW(DWGCTL, 0x00000000); 303 304 /*find which quadrant */ 305 switch((list[i].dest_top > list[i].src_top) | ((list[i].dest_left > list[i].src_left) << 1)) 306 { 307 case 0: /*L->R,down*/ 308 ACCW(SGN, 0); 309 ACCW(AR3, t_start); 310 ACCW(AR0, t_end); 311 ACCW(AR5, offset); 312 ACCW_YDSTLEN(list[i].dest_top, list[i].height + 1); 313 break; 314 case 1: /*L->R,up*/ 315 ACCW(SGN, 4); 316 ACCW(AR3, b_start); 317 ACCW(AR0, b_end); 318 ACCW(AR5, -offset); 319 ACCW_YDSTLEN(list[i].dest_top + list[i].height, list[i].height + 1); 320 break; 321 case 2: /*R->L,down*/ 322 ACCW(SGN, 1); 323 ACCW(AR3, t_end); 324 ACCW(AR0, t_start); 325 ACCW(AR5, offset); 326 ACCW_YDSTLEN(list[i].dest_top, list[i].height + 1); 327 break; 328 case 3: /*R->L,up*/ 329 ACCW(SGN, 5); 330 ACCW(AR3, b_end); 331 ACCW(AR0, b_start); 332 ACCW(AR5, -offset); 333 ACCW_YDSTLEN(list[i].dest_top + list[i].height, list[i].height + 1); 334 break; 335 } 336 ACCW(FXBNDRY,((list[i].dest_left + list[i].width) << 16) | list[i].dest_left); 337 338 /* start the blit */ 339 ACCW(FCOL, transparent_colour); 340 ACCW(BCOL, 0xffffffff); 341 ACCGO(DWGCTL, 0x440c4018); // atype RSTR 342 i++; 343 } 344 } 345 346 /* screen to screen scaled filtered blit - i.e. scale video in memory. 347 * Engine function texture mapping for video, paragraphs 4.5.5.5 - 4.5.5.9 */ 348 //fixme: implement... 349 void SCREEN_TO_SCREEN_SCALED_FILTERED_BLIT(engine_token *et, scaled_blit_params *list, uint32 count) 350 { 351 int i = 0; 352 353 while (count--) 354 { 355 /* 356 list[i].src_left, 357 list[i].src_top, 358 list[i].src_width, 359 list[i].src_height, 360 list[i].dest_left, 361 list[i].dest_top, 362 list[i].dest_width, 363 list[i].dest_height 364 */ 365 i++; 366 } 367 } 368 369 /* rectangle fill. 370 * Engine function rectangle_fill: paragraph 4.5.5.2 */ 371 void FILL_RECTANGLE(engine_token *et, uint32 colorIndex, fill_rect_params *list, uint32 count) 372 { 373 /* 374 FXBNDRY - left and right coordinates a 375 YDSTLEN - y start and no of lines a 376 (or YDST and LEN) 377 DWGCTL - atype must be RSTR or BLK a 378 FCOL - foreground colour a 379 */ 380 int i = 0; 381 382 while (count--) 383 { 384 ACCW(FXBNDRY, (((list[i].right + 1) << 16) | list[i].left)); 385 ACCW_YDSTLEN(list[i].top, ((list[i].bottom - list[i].top) + 1)); 386 ACCW(FCOL, colorIndex); 387 388 /* start the fill */ 389 //acc fixme: checkout blockmode constraints for G100+ (mil: nc?): also add blockmode 390 // for other functions, and use fastblt on MIL1/2 if possible... 391 //or is CMAP8 contraint a non-blockmode contraint? (linearisation problem maybe?) 392 if ((si->dm.space == B_CMAP8) || si->ps.sdram) 393 { 394 ACCGO(DWGCTL, 0x400c7814); // atype RSTR 395 } 396 else 397 { 398 ACCGO(DWGCTL, 0x400c7844); // atype BLK 399 } 400 i++; 401 } 402 } 403 404 /* horizontal span fill. 405 * Engine function rectangle_fill: paragraph 4.5.5.2 */ 406 //(uint32 xs,uint32 xe,uint32 ys,uint32 yl,uint32 col) 407 void FILL_SPAN(engine_token *et, uint32 colorIndex, uint16 *list, uint32 count) 408 { 409 /* 410 FXBNDRY - left and right coordinates a 411 YDSTLEN - y start and no of lines a 412 (or YDST and LEN) 413 DWGCTL - atype must be RSTR or BLK a 414 FCOL - foreground colour a 415 */ 416 int i = 0; 417 418 while (count--) 419 { 420 ACCW(FXBNDRY, ((list[i + 2] + 1) << 16)| list[i + 1]); 421 ACCW_YDSTLEN(list[i], 1); 422 ACCW(FCOL, colorIndex); 423 424 /* start the fill */ 425 //acc fixme: checkout blockmode constraints for G100+ (mil: nc?): also add blockmode 426 // for other functions, and use fastblt on MIL1/2 if possible... 427 //or is CMAP8 contraint a non-blockmode contraint? (linearisation problem maybe?) 428 if ((si->dm.space == B_CMAP8) || si->ps.sdram) 429 { 430 ACCGO(DWGCTL, 0x400c7814); // atype RSTR 431 } 432 else 433 { 434 ACCGO(DWGCTL, 0x400c7844); // atype BLK 435 } 436 i += 3; 437 } 438 } 439 440 /* rectangle invert. 441 * Engine function rectangle_fill: paragraph 4.5.5.2 */ 442 void INVERT_RECTANGLE(engine_token *et, fill_rect_params *list, uint32 count) 443 { 444 /* 445 FXBNDRY - left and right coordinates a 446 YDSTLEN - y start and no of lines a 447 (or YDST and LEN) 448 DWGCTL - atype must be RSTR or BLK a 449 FCOL - foreground colour a 450 */ 451 int i = 0; 452 // uint32 * dma; 453 // uint32 pci; 454 455 while (count--) 456 { 457 ACCW(FXBNDRY, (((list[i].right) + 1) << 16) | list[i].left); 458 ACCW_YDSTLEN(list[i].top, ((list[i].bottom - list[i].top) + 1)); 459 ACCW(FCOL, 0); /* color */ 460 461 /* start the invert (top nibble is c is clipping enabled) */ 462 ACCGO(DWGCTL, 0x40057814); // atype RSTR 463 464 /* pseudo_dma version! */ 465 // MGAACC_DWGCTL =0x1c00, 466 // MGAACC_FCOL =0x1c24, 467 // MGAACC_FXBNDRY =0x1c84, 468 // MGAACC_YDSTLEN =0x1c88, 469 // 470 // 40,09,21,22 (ordered as registers) 471 472 // dma = (uint32 *)si->pseudo_dma; 473 // *dma++= 0x40092221; 474 // *dma++= (((list[i].right) + 1) << 16) | list[i].left; 475 // *dma++= (list[i].top << 16) | ((list[i].bottom - list[i].top) + 1); 476 // *dma++= 0; /* color */ 477 // *dma++= 0x40057814; 478 479 /* real dma version! */ 480 // dma = (vuint32 *)si->dma_buffer; 481 // *dma++= 0x40092221; /* indices */ 482 // *dma++= (((list[i].right) + 1) << 16) | list[i].left; 483 // *dma++= (list[i].top << 16) | ((list[i].bottom - list[i].top) + 1); 484 // *dma++= 0; /* color */ 485 // *dma++= 0x40057814; 486 487 // pci = si->dma_buffer_pci; 488 // ACCW(PRIMADDRESS, (pci)); 489 // ACCW(PRIMEND, (20 + pci)); 490 491 // delay(100); 492 493 i++; 494 } 495 } 496