/* MGA Acceleration functions */ /* Authors: Mark Watson 2/2000, Rudolf Cornelissen 10/2002-1/2006. */ #define MODULE_BIT 0x00080000 #include "mga_std.h" /*acceleration notes*/ /*functions Be's app_server uses: fill span (horizontal only) fill rectangle (these 2 are very similar) invert rectangle blit */ /* needed by MIL 1/2 because of adress linearisation constraints */ #define ACCW_YDSTLEN(dst, len) do { \ if (si->engine.y_lin) { \ ACCW(YDST,((dst)* (si->fbc.bytes_per_row / (si->engine.depth >> 3))) >> 5); \ ACCW(LEN,len); \ } else ACCW(YDSTLEN,((dst)<<16)|(len)); \ } while (0) status_t gx00_acc_wait_idle() { /* wait until engine completely idle */ while (ACCR(STATUS) & 0x00010000) { /* snooze a bit so I do not hammer the bus */ snooze (100); } return B_OK; } /* AFAIK this must be done for every new screenmode. * Engine required init. */ status_t gx00_acc_init() { /* used for convenience: MACCESS is a write only register! */ uint32 maccess = 0x00000000; /* if we were unable to read PINS, we have to assume something (keeping bit6 zero) */ if ((si->ps.card_type >= G450) && (si->ps.pins_status = B_OK)) { /* b7 v5_mem_type = done by Mark Watson. fixme: still confirm! (unknown bits) */ maccess |= ((((uint32)si->ps.v5_mem_type) & 0x80) >> 1); } /* preset using hardware adress linearisation */ si->engine.y_lin = 0x00; /* reset depth */ si->engine.depth = 0; /* cleanup bitblt */ ACCW(OPMODE,0); /* Set the Z origin to the start of FB (otherwise lockup on blits) */ ACCW(ZORG,0); /* Set pixel width */ switch(si->dm.space) { case B_CMAP8: ACCW(MACCESS, ((maccess & 0xfffffffc) | 0x00)); si->engine.depth = 8; break; case B_RGB15_LITTLE:case B_RGB16_LITTLE: ACCW(MACCESS, ((maccess & 0xfffffffc) | 0x01)); si->engine.depth = 16; break; case B_RGB32_LITTLE:case B_RGBA32_LITTLE: ACCW(MACCESS, ((maccess & 0xfffffffc) | 0x02)); si->engine.depth = 32; break; default: LOG(8,("ACC: init, invalid bit depth\n")); return B_ERROR; } /* setup PITCH: very cardtype specific! */ switch (si->ps.card_type) { case MIL1: switch (si->fbc.bytes_per_row / (si->engine.depth >> 3)) { case 640: case 768: case 800: case 960: case 1024: case 1152: case 1280: case 1600: case 1920: case 2048: /* we are using hardware adress linearisation */ break; default: /* we are using software adress linearisation */ si->engine.y_lin = 0x01; LOG(8,("ACC: using software adress linearisation\n")); break; } ACCW(PITCH, (si->engine.y_lin << 15) | ((si->fbc.bytes_per_row / (si->engine.depth >> 3)) & 0x0FFF)); break; case MIL2: switch (si->fbc.bytes_per_row / (si->engine.depth >> 3)) { case 512: case 640: case 768: case 800: case 832: case 960: case 1024: case 1152: case 1280: case 1600: case 1664: case 1920: case 2048: /* we are using hardware adress linearisation */ break; default: /* we are using software adress linearisation */ si->engine.y_lin = 0x01; LOG(8,("ACC: using software adress linearisation\n")); break; } ACCW(PITCH, (si->engine.y_lin << 15) | ((si->fbc.bytes_per_row / (si->engine.depth >> 3)) & 0x0FFF)); break; case G100: /* always using hardware adress linearisation, because 2D/3D * engine works on every pitch multiple of 32 */ ACCW(PITCH, ((si->fbc.bytes_per_row / (si->engine.depth >> 3)) & 0x0FFF)); break; default: /* G200 and up are equal.. */ /* always using hardware adress linearisation, because 2D/3D * engine works on every pitch multiple of 32 */ ACCW(PITCH, ((si->fbc.bytes_per_row / (si->engine.depth >> 3)) & 0x1FFF)); break; } /* disable plane write mask (needed for SDRAM): actual change needed to get it sent to RAM */ ACCW(PLNWT,0x00000000); ACCW(PLNWT,0xffffffff); if (si->ps.card_type >= G200) { /*DSTORG - location of active screen in framebuffer*/ ACCW(DSTORG,((uint8*)si->fbc.frame_buffer) - ((uint8*)si->framebuffer)); /*SRCORG - init source address - same as dest*/ ACCW(SRCORG,((uint8*)si->fbc.frame_buffer) - ((uint8*)si->framebuffer)); } /* init YDSTORG - apsed, if not inited, BitBlts may fails on <= G200 */ si->engine.src_dst = 0; ACCW(YDSTORG, si->engine.src_dst); /* <= G100 uses this register as SRCORG/DSTORG replacement, but * MIL 1/2 does not need framebuffer space for the hardcursor! */ if ((si->ps.card_type == G100) && (si->settings.hardcursor)) { switch (si->dm.space) { case B_CMAP8: si->engine.src_dst = 1024 / 1; break; case B_RGB15_LITTLE: case B_RGB16_LITTLE: si->engine.src_dst = 1024 / 2; break; case B_RGB32_LITTLE: si->engine.src_dst = 1024 / 4; break; default: LOG(8,("ACC: G100 hardcursor not supported for current colorspace\n")); return B_ERROR; } } ACCW(YDSTORG, si->engine.src_dst); /* clipping */ /* i.e. highest and lowest X pixel adresses */ ACCW(CXBNDRY,(((si->fbc.bytes_per_row / (si->engine.depth >> 3)) - 1) << 16) | (0)); /* Y pixel addresses must be linear */ /* lowest adress */ ACCW(YTOP, 0 + si->engine.src_dst); /* highest adress */ ACCW(YBOT,((si->dm.virtual_height - 1) * (si->fbc.bytes_per_row / (si->engine.depth >> 3))) + si->engine.src_dst); return B_OK; } /* note: moved acceleration 'top-level' routines to be integrated in the engine: it is costly to call the engine for every single function within a loop! (measured with BeRoMeter 1.2.6: upto 15% speed increase on all CPU's.) */ /* screen to screen blit - i.e. move windows around. * Engine function bitblit, paragraph 4.5.7.2 */ void SCREEN_TO_SCREEN_BLIT(engine_token *et, blit_params *list, uint32 count) { uint32 t_start,t_end,offset; uint32 b_start,b_end; int i = 0; /* calc offset 'per line' */ offset = (si->fbc.bytes_per_row / (si->engine.depth >> 3)); while (count--) { /* find where the top and bottom are */ t_end = t_start = list[i].src_left + (offset * list[i].src_top) + si->engine.src_dst; t_end += list[i].width; b_end = b_start = list[i].src_left + (offset * (list[i].src_top + list[i].height)) + si->engine.src_dst; b_end += list[i].width; /* sgnzero bit _must_ be '0' before accessing SGN! */ ACCW(DWGCTL, 0x00000000); /*find which quadrant */ switch((list[i].dest_top > list[i].src_top) | ((list[i].dest_left > list[i].src_left) << 1)) { case 0: /*L->R,down*/ ACCW(SGN, 0); ACCW(AR3, t_start); ACCW(AR0, t_end); ACCW(AR5, offset); ACCW_YDSTLEN(list[i].dest_top, list[i].height + 1); break; case 1: /*L->R,up*/ ACCW(SGN, 4); ACCW(AR3, b_start); ACCW(AR0, b_end); ACCW(AR5, -offset); ACCW_YDSTLEN(list[i].dest_top + list[i].height, list[i].height + 1); break; case 2: /*R->L,down*/ ACCW(SGN, 1); ACCW(AR3, t_end); ACCW(AR0, t_start); ACCW(AR5, offset); ACCW_YDSTLEN(list[i].dest_top, list[i].height + 1); break; case 3: /*R->L,up*/ ACCW(SGN, 5); ACCW(AR3, b_end); ACCW(AR0, b_start); ACCW(AR5, -offset); ACCW_YDSTLEN(list[i].dest_top + list[i].height, list[i].height + 1); break; } ACCW(FXBNDRY,((list[i].dest_left + list[i].width) << 16) | list[i].dest_left); /* start the blit */ ACCGO(DWGCTL, 0x040c4018); // atype RSTR i++; } } /* screen to screen tranparent blit - not sure what uses this. * Engine function bitblit, paragraph 4.5.7.2 */ //WARNING: //yet untested function!! void SCREEN_TO_SCREEN_TRANSPARENT_BLIT(engine_token *et, uint32 transparent_colour, blit_params *list, uint32 count) { uint32 t_start,t_end,offset; uint32 b_start,b_end; int i = 0; /* calc offset 'per line' */ offset = (si->fbc.bytes_per_row / (si->engine.depth >> 3)); while (count--) { /* find where the top and bottom are */ t_end = t_start = list[i].src_left + (offset * list[i].src_top) + si->engine.src_dst; t_end += list[i].width; b_end = b_start = list[i].src_left + (offset * (list[i].src_top + list[i].height)) + si->engine.src_dst; b_end += list[i].width; /* sgnzero bit _must_ be '0' before accessing SGN! */ ACCW(DWGCTL, 0x00000000); /*find which quadrant */ switch((list[i].dest_top > list[i].src_top) | ((list[i].dest_left > list[i].src_left) << 1)) { case 0: /*L->R,down*/ ACCW(SGN, 0); ACCW(AR3, t_start); ACCW(AR0, t_end); ACCW(AR5, offset); ACCW_YDSTLEN(list[i].dest_top, list[i].height + 1); break; case 1: /*L->R,up*/ ACCW(SGN, 4); ACCW(AR3, b_start); ACCW(AR0, b_end); ACCW(AR5, -offset); ACCW_YDSTLEN(list[i].dest_top + list[i].height, list[i].height + 1); break; case 2: /*R->L,down*/ ACCW(SGN, 1); ACCW(AR3, t_end); ACCW(AR0, t_start); ACCW(AR5, offset); ACCW_YDSTLEN(list[i].dest_top, list[i].height + 1); break; case 3: /*R->L,up*/ ACCW(SGN, 5); ACCW(AR3, b_end); ACCW(AR0, b_start); ACCW(AR5, -offset); ACCW_YDSTLEN(list[i].dest_top + list[i].height, list[i].height + 1); break; } ACCW(FXBNDRY,((list[i].dest_left + list[i].width) << 16) | list[i].dest_left); /* start the blit */ ACCW(FCOL, transparent_colour); ACCW(BCOL, 0xffffffff); ACCGO(DWGCTL, 0x440c4018); // atype RSTR i++; } } /* screen to screen scaled filtered blit - i.e. scale video in memory. * Engine function texture mapping for video, paragraphs 4.5.5.5 - 4.5.5.9 */ //fixme: implement... void SCREEN_TO_SCREEN_SCALED_FILTERED_BLIT(engine_token *et, scaled_blit_params *list, uint32 count) { int i = 0; while (count--) { /* list[i].src_left, list[i].src_top, list[i].src_width, list[i].src_height, list[i].dest_left, list[i].dest_top, list[i].dest_width, list[i].dest_height */ i++; } } /* rectangle fill. * Engine function rectangle_fill: paragraph 4.5.5.2 */ void FILL_RECTANGLE(engine_token *et, uint32 colorIndex, fill_rect_params *list, uint32 count) { /* FXBNDRY - left and right coordinates a YDSTLEN - y start and no of lines a (or YDST and LEN) DWGCTL - atype must be RSTR or BLK a FCOL - foreground colour a */ int i = 0; while (count--) { ACCW(FXBNDRY, (((list[i].right + 1) << 16) | list[i].left)); ACCW_YDSTLEN(list[i].top, ((list[i].bottom - list[i].top) + 1)); ACCW(FCOL, colorIndex); /* start the fill */ //acc fixme: checkout blockmode constraints for G100+ (mil: nc?): also add blockmode // for other functions, and use fastblt on MIL1/2 if possible... //or is CMAP8 contraint a non-blockmode contraint? (linearisation problem maybe?) if ((si->dm.space == B_CMAP8) || si->ps.sdram) { ACCGO(DWGCTL, 0x400c7814); // atype RSTR } else { ACCGO(DWGCTL, 0x400c7844); // atype BLK } i++; } } /* horizontal span fill. * Engine function rectangle_fill: paragraph 4.5.5.2 */ //(uint32 xs,uint32 xe,uint32 ys,uint32 yl,uint32 col) void FILL_SPAN(engine_token *et, uint32 colorIndex, uint16 *list, uint32 count) { /* FXBNDRY - left and right coordinates a YDSTLEN - y start and no of lines a (or YDST and LEN) DWGCTL - atype must be RSTR or BLK a FCOL - foreground colour a */ int i = 0; while (count--) { ACCW(FXBNDRY, ((list[i + 2] + 1) << 16)| list[i + 1]); ACCW_YDSTLEN(list[i], 1); ACCW(FCOL, colorIndex); /* start the fill */ //acc fixme: checkout blockmode constraints for G100+ (mil: nc?): also add blockmode // for other functions, and use fastblt on MIL1/2 if possible... //or is CMAP8 contraint a non-blockmode contraint? (linearisation problem maybe?) if ((si->dm.space == B_CMAP8) || si->ps.sdram) { ACCGO(DWGCTL, 0x400c7814); // atype RSTR } else { ACCGO(DWGCTL, 0x400c7844); // atype BLK } i += 3; } } /* rectangle invert. * Engine function rectangle_fill: paragraph 4.5.5.2 */ void INVERT_RECTANGLE(engine_token *et, fill_rect_params *list, uint32 count) { /* FXBNDRY - left and right coordinates a YDSTLEN - y start and no of lines a (or YDST and LEN) DWGCTL - atype must be RSTR or BLK a FCOL - foreground colour a */ int i = 0; // uint32 * dma; // uint32 pci; while (count--) { ACCW(FXBNDRY, (((list[i].right) + 1) << 16) | list[i].left); ACCW_YDSTLEN(list[i].top, ((list[i].bottom - list[i].top) + 1)); ACCW(FCOL, 0); /* color */ /* start the invert (top nibble is c is clipping enabled) */ ACCGO(DWGCTL, 0x40057814); // atype RSTR /* pseudo_dma version! */ // MGAACC_DWGCTL =0x1c00, // MGAACC_FCOL =0x1c24, // MGAACC_FXBNDRY =0x1c84, // MGAACC_YDSTLEN =0x1c88, // // 40,09,21,22 (ordered as registers) // dma = (uint32 *)si->pseudo_dma; // *dma++= 0x40092221; // *dma++= (((list[i].right) + 1) << 16) | list[i].left; // *dma++= (list[i].top << 16) | ((list[i].bottom - list[i].top) + 1); // *dma++= 0; /* color */ // *dma++= 0x40057814; /* real dma version! */ // dma = (vuint32 *)si->dma_buffer; // *dma++= 0x40092221; /* indices */ // *dma++= (((list[i].right) + 1) << 16) | list[i].left; // *dma++= (list[i].top << 16) | ((list[i].bottom - list[i].top) + 1); // *dma++= 0; /* color */ // *dma++= 0x40057814; // pci = si->dma_buffer_pci; // ACCW(PRIMADDRESS, (pci)); // ACCW(PRIMEND, (20 + pci)); // delay(100); i++; } }