1 /* MGA Acceleration functions */ 2 /* Authors: 3 Mark Watson 2/2000, 4 Rudolf Cornelissen 10/2002-1/2004. 5 */ 6 7 #define MODULE_BIT 0x00080000 8 9 #include "mga_std.h" 10 11 /*acceleration notes*/ 12 13 /*functions Be's app_server uses: 14 fill span (horizontal only) 15 fill rectangle (these 2 are very similar) 16 invert rectangle 17 blit 18 */ 19 20 /* needed by MIL 1/2 because of adress linearisation constraints */ 21 #define ACCW_YDSTLEN(dst, len) do { \ 22 if (si->engine.y_lin) { \ 23 ACCW(YDST,((dst)* (si->fbc.bytes_per_row / (si->engine.depth >> 3))) >> 5); \ 24 ACCW(LEN,len); \ 25 } else ACCW(YDSTLEN,((dst)<<16)|(len)); \ 26 } while (0) 27 28 status_t gx00_acc_wait_idle() 29 { 30 /* wait until engine completely idle */ 31 while (ACCR(STATUS) & 0x00010000) 32 { 33 /* snooze a bit so I do not hammer the bus */ 34 snooze (100); 35 } 36 37 return B_OK; 38 } 39 40 /* AFAIK this must be done for every new screenmode. 41 * Engine required init. */ 42 status_t gx00_acc_init() 43 { 44 /* used for convenience: MACCESS is a write only register! */ 45 uint32 maccess = 0x00000000; 46 /* if we were unable to read PINS, we have to assume something (keeping bit6 zero) */ 47 if ((si->ps.card_type >= G450) && (si->ps.pins_status = B_OK)) 48 { 49 /* b7 v5_mem_type = done by Mark Watson. fixme: still confirm! (unknown bits) */ 50 maccess |= ((((uint32)si->ps.v5_mem_type) & 0x80) >> 1); 51 } 52 53 /* preset using hardware adress linearisation */ 54 si->engine.y_lin = 0x00; 55 /* reset depth */ 56 si->engine.depth = 0; 57 58 /* cleanup bitblt */ 59 ACCW(OPMODE,0); 60 61 /* Set the Z origin to the start of FB (otherwise lockup on blits) */ 62 ACCW(ZORG,0); 63 64 /* Set pixel width */ 65 switch(si->dm.space) 66 { 67 case B_CMAP8: 68 ACCW(MACCESS, ((maccess & 0xfffffffc) | 0x00)); 69 si->engine.depth = 8; 70 break; 71 case B_RGB15_LITTLE:case B_RGB16_LITTLE: 72 ACCW(MACCESS, ((maccess & 0xfffffffc) | 0x01)); 73 si->engine.depth = 16; 74 break; 75 case B_RGB32_LITTLE:case B_RGBA32_LITTLE: 76 ACCW(MACCESS, ((maccess & 0xfffffffc) | 0x02)); 77 si->engine.depth = 32; 78 break; 79 default: 80 LOG(8,("ACC: init, invalid bit depth\n")); 81 return B_ERROR; 82 } 83 84 /* setup PITCH: very cardtype specific! */ 85 switch (si->ps.card_type) 86 { 87 case MIL1: 88 switch (si->fbc.bytes_per_row / (si->engine.depth >> 3)) 89 { 90 case 640: 91 case 768: 92 case 800: 93 case 960: 94 case 1024: 95 case 1152: 96 case 1280: 97 case 1600: 98 case 1920: 99 case 2048: 100 /* we are using hardware adress linearisation */ 101 break; 102 default: 103 /* we are using software adress linearisation */ 104 si->engine.y_lin = 0x01; 105 LOG(8,("ACC: using software adress linearisation\n")); 106 break; 107 } 108 ACCW(PITCH, (si->engine.y_lin << 15) | 109 ((si->fbc.bytes_per_row / (si->engine.depth >> 3)) & 0x0FFF)); 110 break; 111 case MIL2: 112 switch (si->fbc.bytes_per_row / (si->engine.depth >> 3)) 113 { 114 case 512: 115 case 640: 116 case 768: 117 case 800: 118 case 832: 119 case 960: 120 case 1024: 121 case 1152: 122 case 1280: 123 case 1600: 124 case 1664: 125 case 1920: 126 case 2048: 127 /* we are using hardware adress linearisation */ 128 break; 129 default: 130 /* we are using software adress linearisation */ 131 si->engine.y_lin = 0x01; 132 LOG(8,("ACC: using software adress linearisation\n")); 133 break; 134 } 135 ACCW(PITCH, (si->engine.y_lin << 15) | 136 ((si->fbc.bytes_per_row / (si->engine.depth >> 3)) & 0x0FFF)); 137 break; 138 case G100: 139 /* always using hardware adress linearisation, because 2D/3D 140 * engine works on every pitch multiple of 32 */ 141 ACCW(PITCH, ((si->fbc.bytes_per_row / (si->engine.depth >> 3)) & 0x0FFF)); 142 break; 143 default: 144 /* G200 and up are equal.. */ 145 /* always using hardware adress linearisation, because 2D/3D 146 * engine works on every pitch multiple of 32 */ 147 ACCW(PITCH, ((si->fbc.bytes_per_row / (si->engine.depth >> 3)) & 0x1FFF)); 148 break; 149 } 150 151 /* disable plane write mask (needed for SDRAM): actual change needed to get it sent to RAM */ 152 ACCW(PLNWT,0x00000000); 153 ACCW(PLNWT,0xffffffff); 154 155 if (si->ps.card_type >= G200) { 156 /*DSTORG - location of active screen in framebuffer*/ 157 ACCW(DSTORG,((uint8*)si->fbc.frame_buffer) - ((uint8*)si->framebuffer)); 158 159 /*SRCORG - init source address - same as dest*/ 160 ACCW(SRCORG,((uint8*)si->fbc.frame_buffer) - ((uint8*)si->framebuffer)); 161 } 162 163 /* init YDSTORG - apsed, if not inited, BitBlts may fails on <= G200 */ 164 si->engine.src_dst = 0; 165 ACCW(YDSTORG, si->engine.src_dst); 166 167 /* <= G100 uses this register as SRCORG/DSTORG replacement, but 168 * MIL 1/2 does not need framebuffer space for the hardcursor! */ 169 if ((si->ps.card_type == G100) && (si->settings.hardcursor)) 170 { 171 switch (si->dm.space) 172 { 173 case B_CMAP8: 174 si->engine.src_dst = 1024 / 1; 175 break; 176 case B_RGB15_LITTLE: 177 case B_RGB16_LITTLE: 178 si->engine.src_dst = 1024 / 2; 179 break; 180 case B_RGB32_LITTLE: 181 si->engine.src_dst = 1024 / 4; 182 break; 183 default: 184 LOG(8,("ACC: G100 hardcursor not supported for current colorspace\n")); 185 return B_ERROR; 186 } 187 } 188 ACCW(YDSTORG, si->engine.src_dst); 189 190 /* clipping */ 191 /* i.e. highest and lowest X pixel adresses */ 192 ACCW(CXBNDRY,(((si->fbc.bytes_per_row / (si->engine.depth >> 3)) - 1) << 16) | (0)); 193 194 /* Y pixel addresses must be linear */ 195 /* lowest adress */ 196 ACCW(YTOP, 0 + si->engine.src_dst); 197 /* highest adress */ 198 ACCW(YBOT,((si->dm.virtual_height - 1) * 199 (si->fbc.bytes_per_row / (si->engine.depth >> 3))) + si->engine.src_dst); 200 201 return B_OK; 202 } 203 204 /* screen to screen blit - i.e. move windows around. 205 * Engine function bitblit, paragraph 4.5.7.2 */ 206 status_t gx00_acc_blit(uint16 xs,uint16 ys,uint16 xd,uint16 yd,uint16 w,uint16 h) 207 { 208 uint32 t_start,t_end,offset; 209 uint32 b_start,b_end; 210 211 /*find where the top,bottom and offset are*/ 212 offset = (si->fbc.bytes_per_row / (si->engine.depth >> 3)); 213 214 t_end = t_start = xs + (offset*ys) + si->engine.src_dst; 215 t_end += w; 216 217 b_end = b_start = xs + (offset*(ys+h)) + si->engine.src_dst; 218 b_end +=w; 219 220 /* sgnzero bit _must_ be '0' before accessing SGN! */ 221 ACCW(DWGCTL,0x00000000); 222 223 /*find which quadrant */ 224 switch((yd>ys)|((xd>xs)<<1)) 225 { 226 case 0: /*L->R,down*/ 227 ACCW(SGN,0); 228 229 ACCW(AR3,t_start); 230 ACCW(AR0,t_end); 231 ACCW(AR5,offset); 232 233 ACCW_YDSTLEN(yd,h+1); 234 break; 235 case 1: /*L->R,up*/ 236 ACCW(SGN,4); 237 238 ACCW(AR3,b_start); 239 ACCW(AR0,b_end); 240 ACCW(AR5,-offset); 241 242 ACCW_YDSTLEN(yd+h,h+1); 243 break; 244 case 2: /*R->L,down*/ 245 ACCW(SGN,1); 246 247 ACCW(AR3,t_end); 248 ACCW(AR0,t_start); 249 ACCW(AR5,offset); 250 251 ACCW_YDSTLEN(yd,h+1); 252 break; 253 case 3: /*R->L,up*/ 254 ACCW(SGN,5); 255 256 ACCW(AR3,b_end); 257 ACCW(AR0,b_start); 258 ACCW(AR5,-offset); 259 260 ACCW_YDSTLEN(yd+h,h+1); 261 break; 262 } 263 ACCW(FXBNDRY,((xd+w)<<16)|xd); 264 265 /*do the blit*/ 266 ACCGO(DWGCTL,0x040C4018); // atype RSTR 267 268 return B_OK; 269 } 270 271 /* screen to screen tranparent blit - not sure what uses this. 272 * Engine function bitblit, paragraph 4.5.7.2 */ 273 status_t gx00_acc_transparent_blit(uint16 xs,uint16 ys,uint16 xd,uint16 yd,uint16 w,uint16 h,uint32 colour) 274 { 275 uint32 t_start,t_end,offset; 276 uint32 b_start,b_end; 277 278 return B_ERROR; 279 280 /*find where the top,bottom and offset are*/ 281 offset = (si->fbc.bytes_per_row / (si->engine.depth >> 3)); 282 283 t_end = t_start = xs + (offset*ys) + si->engine.src_dst; 284 t_end += w; 285 286 b_end = b_start = xs + (offset*(ys+h)) + si->engine.src_dst; 287 b_end +=w; 288 289 /* sgnzero bit _must_ be '0' before accessing SGN! */ 290 ACCW(DWGCTL,0x00000000); 291 292 /*find which quadrant */ 293 switch((yd>ys)|((xd>xs)<<1)) 294 { 295 case 0: /*L->R,down*/ 296 ACCW(SGN,0); 297 298 ACCW(AR3,t_start); 299 ACCW(AR0,t_end); 300 ACCW(AR5,offset); 301 302 ACCW_YDSTLEN(yd,h+1); 303 break; 304 case 1: /*L->R,up*/ 305 ACCW(SGN,4); 306 307 ACCW(AR3,b_start); 308 ACCW(AR0,b_end); 309 ACCW(AR5,-offset); 310 311 ACCW_YDSTLEN(yd+h,h+1); 312 break; 313 case 2: /*R->L,down*/ 314 ACCW(SGN,1); 315 316 ACCW(AR3,t_end); 317 ACCW(AR0,t_start); 318 ACCW(AR5,offset); 319 320 ACCW_YDSTLEN(yd,h+1); 321 break; 322 case 3: /*R->L,up*/ 323 ACCW(SGN,5); 324 325 ACCW(AR3,b_end); 326 ACCW(AR0,b_start); 327 ACCW(AR5,-offset); 328 329 ACCW_YDSTLEN(yd+h,h+1); 330 break; 331 } 332 ACCW(FXBNDRY,((xd+w)<<16)|xd); 333 334 /*do the blit*/ 335 ACCW(FCOL,colour); 336 ACCW(BCOL,0xffffffff); 337 ACCGO(DWGCTL,0x440C4018); // atype RSTR 338 return B_OK; 339 } 340 341 /* rectangle fill. 342 * Engine function rectangle_fill: paragraph 4.5.5.2 */ 343 /*colorIndex,fill_rect_params,count*/ 344 status_t gx00_acc_rectangle(uint32 xs,uint32 xe,uint32 ys,uint32 yl,uint32 col) 345 { 346 /* 347 FXBNDRY - left and right coordinates a 348 YDSTLEN - y start and no of lines a 349 (or YDST and LEN) 350 DWGCTL - atype must be RSTR or BLK a 351 FCOL - foreground colour a 352 */ 353 354 ACCW(FXBNDRY,(xe<<16)|xs); /*set x start and end*/ 355 ACCW_YDSTLEN(ys,yl); /*set y start and length*/ 356 ACCW(FCOL,col); /*set colour*/ 357 358 //acc fixme: checkout blockmode constraints for G100+ (mil: nc?): also add blockmode 359 // for other functions, and use fastblt on MIL1/2 if possible... 360 //or is CMAP8 contraint a non-blockmode contraint? (linearisation problem maybe?) 361 if (si->dm.space==B_CMAP8 || si->ps.sdram) 362 { 363 ACCGO(DWGCTL,0x400C7814); // atype RSTR 364 } 365 else 366 { 367 ACCGO(DWGCTL,0x400C7844); // atype BLK 368 } 369 return B_OK; 370 } 371 372 /* rectangle invert. 373 * Engine function rectangle_fill: paragraph 4.5.5.2 */ 374 /*colorIndex,fill_rect_params,count*/ 375 status_t gx00_acc_rectangle_invert(uint32 xs,uint32 xe,uint32 ys,uint32 yl,uint32 col) 376 { 377 // int i; 378 // uint32 * dma; 379 // uint32 pci; 380 /* 381 FXBNDRY - left and right coordinates a 382 YDSTLEN - y start and no of lines a 383 (or YDST and LEN) 384 DWGCTL - atype must be RSTR or BLK a 385 FCOL - foreground colour a 386 */ 387 388 ACCW(FXBNDRY,(xe<<16)|xs); /*set x start and end*/ 389 ACCW_YDSTLEN(ys,yl); /*set y start and length*/ 390 ACCW(FCOL,col); /*set colour*/ 391 392 /*draw it! top nibble is c is clipping enabled*/ 393 ACCGO(DWGCTL,0x40057814); // atype RSTR 394 395 /*pseudo_dma version!*/ 396 //MGAACC_DWGCTL =0x1C00, 397 //MGAACC_FCOL =0x1C24, 398 //MGAACC_FXBNDRY =0x1C84, 399 //MGAACC_YDSTLEN =0x1C88, 400 // 401 //40,09,21,22 (ordered as registers) 402 403 // dma = (uint32 *)si->pseudo_dma; 404 // *dma++=0x40092221; 405 // *dma++=(xe<<16)|xs; 406 // *dma++=(ys<<16)|yl; 407 // *dma++=col; 408 // *dma++=0x40057814; 409 410 /*real dma version!*/ 411 // dma = (vuint32 *)si->dma_buffer; 412 // *dma++=0x40092221;/*indices*/ 413 // *dma++=(xe<<16)|xs; 414 // *dma++=(ys<<16)|yl; 415 // *dma++=col; 416 // *dma++=0x40057814; 417 418 // pci = si->dma_buffer_pci; 419 // ACCW(PRIMADDRESS,(pci)); 420 // ACCW(PRIMEND,(20+pci)); 421 422 // delay(100); 423 424 return B_OK; 425 } 426 427 /* screen to screen scaled filtered blit - i.e. scale video in memory. 428 * Engine function texture mapping for video, paragraphs 4.5.5.5 - 4.5.5.9 */ 429 status_t gx00_acc_video_blit(uint16 xs,uint16 ys,uint16 ws, uint16 hs, 430 uint16 xd,uint16 yd,uint16 wd,uint16 hd) 431 { 432 //fixme: implement. Used for G450/G550 Desktop TVout... 433 //fixme: see if MIL1 - G200 support this function as well... 434 435 return B_OK; 436 } 437