/* NV Acceleration functions */ /* Author: Rudolf Cornelissen 8/2003-6/2010. This code was possible thanks to: - the Linux XFree86 NV driver, - the Linux UtahGLX 3D driver. */ #define MODULE_BIT 0x00080000 #include "nv_std.h" /*acceleration notes*/ /*functions Be's app_server uses: fill span (horizontal only) fill rectangle (these 2 are very similar) invert rectangle blit */ static void nv_init_for_3D_dma(void); static void nv_start_dma(void); static status_t nv_acc_fifofree_dma(uint16 cmd_size); static void nv_acc_cmd_dma(uint32 cmd, uint16 offset, uint16 size); static void nv_acc_set_ch_dma(uint16 ch, uint32 handle); /* used to track engine DMA stalls */ static uint8 err; /* wait until engine completely idle */ status_t nv_acc_wait_idle_dma() { /* we'd better check for timeouts on the DMA engine as it's theoretically * breakable by malfunctioning software */ uint16 cnt = 0; /* wait until all upcoming commands are in execution at least. Do this until * we hit a timeout; abort if we failed at least three times before: * if DMA stalls, we have to forget about it alltogether at some point, or * the system will almost come to a complete halt.. */ /* note: * it doesn't matter which FIFO channel's DMA registers we access, they are in * fact all the same set. It also doesn't matter if the channel was assigned a * command or not. */ while ((NV_REG32(NVACC_FIFO + NV_GENERAL_DMAGET) != (si->engine.dma.put << 2)) && (cnt < 10000) && (err < 3)) { /* snooze a bit so I do not hammer the bus */ snooze (100); cnt++; } /* log timeout if we had one */ if (cnt == 10000) { if (err < 3) err++; LOG(4,("ACC_DMA: wait_idle; DMA timeout #%d, engine trouble!\n", err)); } /* wait until execution completed */ while (ACCR(STATUS)) { /* snooze a bit so I do not hammer the bus */ snooze (100); } return B_OK; } /* AFAIK this must be done for every new screenmode. * Engine required init. */ status_t nv_acc_init_dma() { uint32 cnt, tmp; uint32 surf_depth, cmd_depth; /* reset the engine DMA stalls counter */ err = 0; /* a hanging engine only recovers from a complete power-down/power-up cycle */ NV_REG32(NV32_PWRUPCTRL) = 0xffff00ff; snooze(1000); NV_REG32(NV32_PWRUPCTRL) = 0xffffffff; /* don't try this on NV20 and later.. */ /* note: * the specific register that's responsible for the speedfix on NV18 is * $00400ed8: bit 6 needs to be zero for fastest rendering (confirmed). */ /* note also: * on NV28 the following ranges could be reset (confirmed): * $00400000 upto/incl. $004002fc; * $00400400 upto/incl. $004017fc; * $0040180c upto/incl. $00401948; * $00401994 upto/incl. $00401a80; * $00401a94 upto/incl. $00401ffc. * The intermediate ranges hang the engine upon resetting. */ if (si->ps.card_arch < NV20A) { /* actively reset the PGRAPH registerset (acceleration engine) */ for (cnt = 0x00400000; cnt < 0x00402000; cnt +=4) { NV_REG32(cnt) = 0x00000000; } } /* setup PTIMER: */ LOG(4,("ACC_DMA: timer numerator $%08x, denominator $%08x\n", ACCR(PT_NUMERATOR), ACCR(PT_DENOMINATR))); /* The NV28 BIOS programs PTIMER like this (see coldstarting in nv_info.c) */ //ACCW(PT_NUMERATOR, (si->ps.std_engine_clock * 20)); //ACCW(PT_DENOMINATR, 0x00000271); /* Nouveau (march 2009) mentions something like: writing 8 and 3 to these regs breaks the timings * on the LVDS hardware sequencing microcode. A correct solution involves calculations with the GPU PLL. */ /* For now use BIOS pre-programmed values if there */ if (!ACCR(PT_NUMERATOR) || !ACCR(PT_DENOMINATR)) { /* set timer numerator to 8 (in b0-15) */ ACCW(PT_NUMERATOR, 0x00000008); /* set timer denominator to 3 (in b0-15) */ ACCW(PT_DENOMINATR, 0x00000003); } /* disable timer-alarm INT requests (b0) */ ACCW(PT_INTEN, 0x00000000); /* reset timer-alarm INT status bit (b0) */ ACCW(PT_INTSTAT, 0xffffffff); /* enable PRAMIN write access on pre NV10 before programming it! */ if (si->ps.card_arch == NV04A) { /* set framebuffer config: type = notiling, PRAMIN write access enabled */ NV_REG32(NV32_PFB_CONFIG_0) = 0x00001114; } else { /* setup acc engine 'source' tile adressranges */ if ((si->ps.card_type <= NV40) || (si->ps.card_type == NV45)) { ACCW(NV10_FBTIL0AD, 0); ACCW(NV10_FBTIL1AD, 0); ACCW(NV10_FBTIL2AD, 0); ACCW(NV10_FBTIL3AD, 0); ACCW(NV10_FBTIL4AD, 0); ACCW(NV10_FBTIL5AD, 0); ACCW(NV10_FBTIL6AD, 0); ACCW(NV10_FBTIL7AD, 0); ACCW(NV10_FBTIL0ED, (si->ps.memory_size - 1)); ACCW(NV10_FBTIL1ED, (si->ps.memory_size - 1)); ACCW(NV10_FBTIL2ED, (si->ps.memory_size - 1)); ACCW(NV10_FBTIL3ED, (si->ps.memory_size - 1)); ACCW(NV10_FBTIL4ED, (si->ps.memory_size - 1)); ACCW(NV10_FBTIL5ED, (si->ps.memory_size - 1)); ACCW(NV10_FBTIL6ED, (si->ps.memory_size - 1)); ACCW(NV10_FBTIL7ED, (si->ps.memory_size - 1)); } else { /* NV41, 43, 44, G70 and up */ ACCW(NV41_FBTIL0AD, 0); ACCW(NV41_FBTIL1AD, 0); ACCW(NV41_FBTIL2AD, 0); ACCW(NV41_FBTIL3AD, 0); ACCW(NV41_FBTIL4AD, 0); ACCW(NV41_FBTIL5AD, 0); ACCW(NV41_FBTIL6AD, 0); ACCW(NV41_FBTIL7AD, 0); ACCW(NV41_FBTIL8AD, 0); ACCW(NV41_FBTIL9AD, 0); ACCW(NV41_FBTILAAD, 0); ACCW(NV41_FBTILBAD, 0); ACCW(NV41_FBTIL0ED, (si->ps.memory_size - 1)); ACCW(NV41_FBTIL1ED, (si->ps.memory_size - 1)); ACCW(NV41_FBTIL2ED, (si->ps.memory_size - 1)); ACCW(NV41_FBTIL3ED, (si->ps.memory_size - 1)); ACCW(NV41_FBTIL4ED, (si->ps.memory_size - 1)); ACCW(NV41_FBTIL5ED, (si->ps.memory_size - 1)); ACCW(NV41_FBTIL6ED, (si->ps.memory_size - 1)); ACCW(NV41_FBTIL7ED, (si->ps.memory_size - 1)); ACCW(NV41_FBTIL8ED, (si->ps.memory_size - 1)); ACCW(NV41_FBTIL9ED, (si->ps.memory_size - 1)); ACCW(NV41_FBTILAED, (si->ps.memory_size - 1)); ACCW(NV41_FBTILBED, (si->ps.memory_size - 1)); if (si->ps.card_type >= G70) { ACCW(G70_FBTILCAD, 0); ACCW(G70_FBTILDAD, 0); ACCW(G70_FBTILEAD, 0); ACCW(G70_FBTILCED, (si->ps.memory_size - 1)); ACCW(G70_FBTILDED, (si->ps.memory_size - 1)); ACCW(G70_FBTILEED, (si->ps.memory_size - 1)); } } } /*** PRAMIN ***/ /* first clear the entire RAMHT (hash-table) space to a defined state. It turns * out at least NV11 will keep the previously programmed handles over resets and * power-outages upto about 15 seconds!! Faulty entries might well hang the * engine (confirmed on NV11). * Note: * this behaviour is not very strange: even very old DRAM chips are known to be * able to do this, even though you should refresh them every few milliseconds or * so. (Large memory cell capacitors, though different cells vary a lot in their * capacity.) * Of course data validity is not certain by a long shot over this large * amount of time.. */ for(cnt = 0; cnt < 0x0400; cnt++) NV_REG32(NVACC_HT_HANDL_00 + (cnt << 2)) = 0; /* RAMHT (hash-table) space SETUP FIFO HANDLES */ /* note: * 'instance' tells you where the engine command is stored in 'PR_CTXx_x' sets * below: instance being b4-19 with baseadress NV_PRAMIN_CTX_0 (0x00700000). * That command is linked to the handle noted here. This handle is then used to * tell the FIFO to which engine command it is connected! * (CTX registers are actually a sort of RAM space.) */ if (si->ps.card_arch >= NV40A) { /* (first set) */ ACCW(HT_HANDL_00, (0x80000000 | NV10_CONTEXT_SURFACES_2D)); /* 32bit handle (not used) */ ACCW(HT_VALUE_00, 0x0010114c); /* instance $114c, engine = acc engine, CHID = $00 */ ACCW(HT_HANDL_01, (0x80000000 | NV_IMAGE_BLIT)); /* 32bit handle */ ACCW(HT_VALUE_01, 0x00101148); /* instance $1148, engine = acc engine, CHID = $00 */ ACCW(HT_HANDL_02, (0x80000000 | NV4_GDI_RECTANGLE_TEXT)); /* 32bit handle */ ACCW(HT_VALUE_02, 0x0010114a); /* instance $114a, engine = acc engine, CHID = $00 */ /* (second set) */ ACCW(HT_HANDL_10, (0x80000000 | NV_ROP5_SOLID)); /* 32bit handle */ ACCW(HT_VALUE_10, 0x00101142); /* instance $1142, engine = acc engine, CHID = $00 */ ACCW(HT_HANDL_11, (0x80000000 | NV_IMAGE_BLACK_RECTANGLE)); /* 32bit handle */ ACCW(HT_VALUE_11, 0x00101144); /* instance $1144, engine = acc engine, CHID = $00 */ ACCW(HT_HANDL_12, (0x80000000 | NV_IMAGE_PATTERN)); /* 32bit handle */ ACCW(HT_VALUE_12, 0x00101146); /* instance $1146, engine = acc engine, CHID = $00 */ ACCW(HT_HANDL_13, (0x80000000 | NV_SCALED_IMAGE_FROM_MEMORY)); /* 32bit handle */ ACCW(HT_VALUE_13, 0x0010114e); /* instance $114e, engine = acc engine, CHID = $00 */ } else { /* (first set) */ ACCW(HT_HANDL_00, (0x80000000 | NV4_SURFACE)); /* 32bit handle */ ACCW(HT_VALUE_00, 0x80011145); /* instance $1145, engine = acc engine, CHID = $00 */ ACCW(HT_HANDL_01, (0x80000000 | NV_IMAGE_BLIT)); /* 32bit handle */ ACCW(HT_VALUE_01, 0x80011146); /* instance $1146, engine = acc engine, CHID = $00 */ ACCW(HT_HANDL_02, (0x80000000 | NV4_GDI_RECTANGLE_TEXT)); /* 32bit handle */ ACCW(HT_VALUE_02, 0x80011147); /* instance $1147, engine = acc engine, CHID = $00 */ ACCW(HT_HANDL_03, (0x80000000 | NV4_CONTEXT_SURFACES_ARGB_ZS)); /* 32bit handle (3D) */ ACCW(HT_VALUE_03, 0x80011148); /* instance $1148, engine = acc engine, CHID = $00 */ /* NV4_ and NV10_DX5_TEXTURE_TRIANGLE should be identical */ ACCW(HT_HANDL_04, (0x80000000 | NV4_DX5_TEXTURE_TRIANGLE)); /* 32bit handle (3D) */ ACCW(HT_VALUE_04, 0x80011149); /* instance $1149, engine = acc engine, CHID = $00 */ /* NV4_ and NV10_DX6_MULTI_TEXTURE_TRIANGLE should be identical */ ACCW(HT_HANDL_05, (0x80000000 | NV4_DX6_MULTI_TEXTURE_TRIANGLE)); /* 32bit handle (not used) */ ACCW(HT_VALUE_05, 0x8001114a); /* instance $114a, engine = acc engine, CHID = $00 */ ACCW(HT_HANDL_06, (0x80000000 | NV1_RENDER_SOLID_LIN)); /* 32bit handle (not used) */ ACCW(HT_VALUE_06, 0x8001114c); /* instance $114c, engine = acc engine, CHID = $00 */ /* (second set) */ ACCW(HT_HANDL_10, (0x80000000 | NV_ROP5_SOLID)); /* 32bit handle */ ACCW(HT_VALUE_10, 0x80011142); /* instance $1142, engine = acc engine, CHID = $00 */ ACCW(HT_HANDL_11, (0x80000000 | NV_IMAGE_BLACK_RECTANGLE)); /* 32bit handle */ ACCW(HT_VALUE_11, 0x80011143); /* instance $1143, engine = acc engine, CHID = $00 */ ACCW(HT_HANDL_12, (0x80000000 | NV_IMAGE_PATTERN)); /* 32bit handle */ ACCW(HT_VALUE_12, 0x80011144); /* instance $1144, engine = acc engine, CHID = $00 */ ACCW(HT_HANDL_13, (0x80000000 | NV_SCALED_IMAGE_FROM_MEMORY)); /* 32bit handle */ ACCW(HT_VALUE_13, 0x8001114b); /* instance $114b, engine = acc engine, CHID = $00 */ //2007 3D tests.. if (si->ps.card_type == NV15) { ACCW(HT_HANDL_14, (0x80000000 | NV_TCL_PRIMITIVE_3D)); /* 32bit handle */ ACCW(HT_VALUE_14, 0x8001114d); /* instance $114d, engine = acc engine, CHID = $00 */ } } /* program CTX registers: CTX1 is mostly done later (colorspace dependant) */ /* note: * CTX determines which HT handles point to what engine commands. */ /* note also: * CTX registers are in fact in the same GPU internal RAM space as the engine's * hashtable. This means that stuff programmed in here also survives resets and * power-outages! (confirmed NV11) */ if (si->ps.card_arch >= NV40A) { /* setup a DMA define for use by command defines below. */ ACCW(PR_CTX0_R, 0x00003000); /* DMA page table present and of linear type; * DMA target node is NVM (non-volatile memory?) * (instead of doing PCI or AGP transfers) */ ACCW(PR_CTX1_R, (si->ps.memory_size - 1)); /* DMA limit: size is all cardRAM */ ACCW(PR_CTX2_R, ((0x00000000 & 0xfffff000) | 0x00000002)); /* DMA access type is READ_AND_WRITE; * memory starts at start of cardRAM (b12-31): * It's adress needs to be at a 4kb boundary! */ ACCW(PR_CTX3_R, 0x00000002); /* unknown (looks like this is rubbish/not needed?) */ /* setup set '0' for cmd NV_ROP5_SOLID */ ACCW(PR_CTX0_0, 0x02080043); /* NVclass $043, patchcfg ROP_AND, nv10+: little endian */ ACCW(PR_CTX1_0, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ ACCW(PR_CTX2_0, 0x00000000); /* DMA0 and DMA1 instance invalid */ ACCW(PR_CTX3_0, 0x00000000); /* method traps disabled */ ACCW(PR_CTX0_1, 0x00000000); /* extra */ ACCW(PR_CTX1_1, 0x00000000); /* extra */ /* setup set '1' for cmd NV_IMAGE_BLACK_RECTANGLE */ ACCW(PR_CTX0_2, 0x02080019); /* NVclass $019, patchcfg ROP_AND, nv10+: little endian */ ACCW(PR_CTX1_2, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ ACCW(PR_CTX2_2, 0x00000000); /* DMA0 and DMA1 instance invalid */ ACCW(PR_CTX3_2, 0x00000000); /* method traps disabled */ ACCW(PR_CTX0_3, 0x00000000); /* extra */ ACCW(PR_CTX1_3, 0x00000000); /* extra */ /* setup set '2' for cmd NV_IMAGE_PATTERN */ ACCW(PR_CTX0_4, 0x02080018); /* NVclass $018, patchcfg ROP_AND, nv10+: little endian */ ACCW(PR_CTX1_4, 0x02000000); /* colorspace not set, notify instance is $0200 (b16-31) */ ACCW(PR_CTX2_4, 0x00000000); /* DMA0 and DMA1 instance invalid */ ACCW(PR_CTX3_4, 0x00000000); /* method traps disabled */ ACCW(PR_CTX0_5, 0x00000000); /* extra */ ACCW(PR_CTX1_5, 0x00000000); /* extra */ /* setup set '4' for cmd NV12_IMAGE_BLIT */ ACCW(PR_CTX0_6, 0x0208009f); /* NVclass $09f, patchcfg ROP_AND, nv10+: little endian */ ACCW(PR_CTX1_6, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ ACCW(PR_CTX2_6, 0x00001140); /* DMA0 instance is $1140, DMA1 instance invalid */ ACCW(PR_CTX3_6, 0x00001140); /* method trap 0 is $1140, trap 1 disabled */ ACCW(PR_CTX0_7, 0x00000000); /* extra */ ACCW(PR_CTX1_7, 0x00000000); /* extra */ /* setup set '5' for cmd NV4_GDI_RECTANGLE_TEXT */ ACCW(PR_CTX0_8, 0x0208004a); /* NVclass $04a, patchcfg ROP_AND, nv10+: little endian */ ACCW(PR_CTX1_8, 0x02000000); /* colorspace not set, notify instance is $0200 (b16-31) */ ACCW(PR_CTX2_8, 0x00000000); /* DMA0 and DMA1 instance invalid */ ACCW(PR_CTX3_8, 0x00000000); /* method traps disabled */ ACCW(PR_CTX0_9, 0x00000000); /* extra */ ACCW(PR_CTX1_9, 0x00000000); /* extra */ /* setup set '6' for cmd NV10_CONTEXT_SURFACES_2D */ ACCW(PR_CTX0_A, 0x02080062); /* NVclass $062, nv10+: little endian */ ACCW(PR_CTX1_A, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ ACCW(PR_CTX2_A, 0x00001140); /* DMA0 instance is $1140, DMA1 instance invalid */ ACCW(PR_CTX3_A, 0x00001140); /* method trap 0 is $1140, trap 1 disabled */ ACCW(PR_CTX0_B, 0x00000000); /* extra */ ACCW(PR_CTX1_B, 0x00000000); /* extra */ /* setup set '7' for cmd NV_SCALED_IMAGE_FROM_MEMORY */ ACCW(PR_CTX0_C, 0x02080077); /* NVclass $077, nv10+: little endian */ ACCW(PR_CTX1_C, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ ACCW(PR_CTX2_C, 0x00001140); /* DMA0 instance is $1140, DMA1 instance invalid */ ACCW(PR_CTX3_C, 0x00001140); /* method trap 0 is $1140, trap 1 disabled */ ACCW(PR_CTX0_D, 0x00000000); /* extra */ ACCW(PR_CTX1_D, 0x00000000); /* extra */ /* setup DMA set pointed at by PF_CACH1_DMAI */ ACCW(PR_CTX0_E, 0x00003002); /* DMA page table present and of linear type; * DMA class is $002 (b0-11); * DMA target node is NVM (non-volatile memory?) * (instead of doing PCI or AGP transfers) */ ACCW(PR_CTX1_E, 0x00007fff); /* DMA limit: tablesize is 32k bytes */ ACCW(PR_CTX2_E, (((si->ps.memory_size - 1) & 0xffff8000) | 0x00000002)); /* DMA access type is READ_AND_WRITE; * table is located at end of cardRAM (b12-31): * It's adress needs to be at a 4kb boundary! */ } else { /* setup a DMA define for use by command defines below. */ ACCW(PR_CTX0_R, 0x00003000); /* DMA page table present and of linear type; * DMA target node is NVM (non-volatile memory?) * (instead of doing PCI or AGP transfers) */ ACCW(PR_CTX1_R, (si->ps.memory_size - 1)); /* DMA limit: size is all cardRAM */ ACCW(PR_CTX2_R, ((0x00000000 & 0xfffff000) | 0x00000002)); /* DMA access type is READ_AND_WRITE; * memory starts at start of cardRAM (b12-31): * It's adress needs to be at a 4kb boundary! */ ACCW(PR_CTX3_R, 0x00000002); /* unknown (looks like this is rubbish/not needed?) */ /* setup set '0' for cmd NV_ROP5_SOLID */ ACCW(PR_CTX0_0, 0x01008043); /* NVclass $043, patchcfg ROP_AND, nv10+: little endian */ ACCW(PR_CTX1_0, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ ACCW(PR_CTX2_0, 0x00000000); /* DMA0 and DMA1 instance invalid */ ACCW(PR_CTX3_0, 0x00000000); /* method traps disabled */ /* setup set '1' for cmd NV_IMAGE_BLACK_RECTANGLE */ ACCW(PR_CTX0_1, 0x01008019); /* NVclass $019, patchcfg ROP_AND, nv10+: little endian */ ACCW(PR_CTX1_1, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ ACCW(PR_CTX2_1, 0x00000000); /* DMA0 and DMA1 instance invalid */ ACCW(PR_CTX3_1, 0x00000000); /* method traps disabled */ /* setup set '2' for cmd NV_IMAGE_PATTERN */ ACCW(PR_CTX0_2, 0x01008018); /* NVclass $018, patchcfg ROP_AND, nv10+: little endian */ ACCW(PR_CTX1_2, 0x00000002); /* colorspace not set, notify instance is $0200 (b16-31) */ ACCW(PR_CTX2_2, 0x00000000); /* DMA0 and DMA1 instance invalid */ ACCW(PR_CTX3_2, 0x00000000); /* method traps disabled */ /* setup set '3' for ... */ if(si->ps.card_arch >= NV10A) { /* ... cmd NV10_CONTEXT_SURFACES_2D */ ACCW(PR_CTX0_3, 0x01008062); /* NVclass $062, nv10+: little endian */ } else { /* ... cmd NV4_SURFACE */ ACCW(PR_CTX0_3, 0x01008042); /* NVclass $042, nv10+: little endian */ } ACCW(PR_CTX1_3, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ ACCW(PR_CTX2_3, 0x11401140); /* DMA0 instance is $1140, DMA1 instance invalid */ ACCW(PR_CTX3_3, 0x00000000); /* method trap 0 is $1140, trap 1 disabled */ /* setup set '4' for ... */ if (si->ps.card_type >= NV11) { /* ... cmd NV12_IMAGE_BLIT */ ACCW(PR_CTX0_4, 0x0100809f); /* NVclass $09f, patchcfg ROP_AND, nv10+: little endian */ } else { /* ... cmd NV_IMAGE_BLIT */ ACCW(PR_CTX0_4, 0x0100805f); /* NVclass $05f, patchcfg ROP_AND, nv10+: little endian */ } ACCW(PR_CTX1_4, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ ACCW(PR_CTX2_4, 0x11401140); /* DMA0 instance is $1140, DMA1 instance invalid */ ACCW(PR_CTX3_4, 0x00000000); /* method trap 0 is $1140, trap 1 disabled */ /* setup set '5' for cmd NV4_GDI_RECTANGLE_TEXT */ ACCW(PR_CTX0_5, 0x0100804a); /* NVclass $04a, patchcfg ROP_AND, nv10+: little endian */ ACCW(PR_CTX1_5, 0x00000002); /* colorspace not set, notify instance is $0200 (b16-31) */ ACCW(PR_CTX2_5, 0x00000000); /* DMA0 and DMA1 instance invalid */ ACCW(PR_CTX3_5, 0x00000000); /* method traps disabled */ /* setup set '6' ... */ if (si->ps.card_arch >= NV10A) { /* ... for cmd NV10_CONTEXT_SURFACES_ARGB_ZS */ ACCW(PR_CTX0_6, 0x00000093); /* NVclass $093, nv10+: little endian */ } else { /* ... for cmd NV4_CONTEXT_SURFACES_ARGB_ZS */ ACCW(PR_CTX0_6, 0x00000053); /* NVclass $053, nv10+: little endian */ } ACCW(PR_CTX1_6, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ ACCW(PR_CTX2_6, 0x11401140); /* DMA0, DMA1 instance = $1140 */ ACCW(PR_CTX3_6, 0x00000000); /* method traps disabled */ /* setup set '7' ... */ if (si->ps.card_arch >= NV10A) { /* ... for cmd NV10_DX5_TEXTURE_TRIANGLE */ ACCW(PR_CTX0_7, 0x0300a094); /* NVclass $094, patchcfg ROP_AND, userclip enable, * context surface0 valid, nv10+: little endian */ } else { /* ... for cmd NV4_DX5_TEXTURE_TRIANGLE */ ACCW(PR_CTX0_7, 0x0300a054); /* NVclass $054, patchcfg ROP_AND, userclip enable, * context surface0 valid */ } ACCW(PR_CTX1_7, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ ACCW(PR_CTX2_7, 0x11401140); /* DMA0, DMA1 instance = $1140 */ ACCW(PR_CTX3_7, 0x00000000); /* method traps disabled */ /* setup set '8' ... */ if (si->ps.card_arch >= NV10A) { /* ... for cmd NV10_DX6_MULTI_TEXTURE_TRIANGLE (not used) */ ACCW(PR_CTX0_8, 0x0300a095); /* NVclass $095, patchcfg ROP_AND, userclip enable, * context surface0 valid, nv10+: little endian */ } else { /* ... for cmd NV4_DX6_MULTI_TEXTURE_TRIANGLE (not used) */ ACCW(PR_CTX0_8, 0x0300a055); /* NVclass $055, patchcfg ROP_AND, userclip enable, * context surface0 valid */ } ACCW(PR_CTX1_8, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ ACCW(PR_CTX2_8, 0x11401140); /* DMA0, DMA1 instance = $1140 */ ACCW(PR_CTX3_8, 0x00000000); /* method traps disabled */ /* setup set '9' for cmd NV_SCALED_IMAGE_FROM_MEMORY */ ACCW(PR_CTX0_9, 0x01018077); /* NVclass $077, patchcfg SRC_COPY, * context surface0 valid, nv10+: little endian */ ACCW(PR_CTX1_9, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ ACCW(PR_CTX2_9, 0x11401140); /* DMA0, DMA1 instance = $1140 */ ACCW(PR_CTX3_9, 0x00000000); /* method traps disabled */ /* setup set 'A' for cmd NV1_RENDER_SOLID_LIN (not used) */ ACCW(PR_CTX0_A, 0x0300a01c); /* NVclass $01c, patchcfg ROP_AND, userclip enable, * context surface0 valid, nv10+: little endian */ ACCW(PR_CTX1_A, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ ACCW(PR_CTX2_A, 0x11401140); /* DMA0, DMA1 instance = $1140 */ ACCW(PR_CTX3_A, 0x00000000); /* method traps disabled */ //2007 3D tests.. /* setup set 'B' ... */ if (si->ps.card_type == NV15) { /* ... for cmd NV11_TCL_PRIMITIVE_3D */ ACCW(PR_CTX0_B, 0x0300a096); /* NVclass $096, patchcfg ROP_AND, userclip enable, * context surface0 valid, nv10+: little endian */ ACCW(PR_CTX1_B, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ ACCW(PR_CTX2_B, 0x11401140); /* DMA0, DMA1 instance = $1140 */ ACCW(PR_CTX3_B, 0x00000000); /* method traps disabled */ } /* setup DMA set pointed at by PF_CACH1_DMAI */ if (si->engine.agp_mode) { /* DMA page table present and of linear type; * DMA class is $002 (b0-11); * DMA target node is AGP */ ACCW(PR_CTX0_C, 0x00033002); } else { /* DMA page table present and of linear type; * DMA class is $002 (b0-11); * DMA target node is PCI */ ACCW(PR_CTX0_C, 0x00023002); } ACCW(PR_CTX1_C, 0x000fffff); /* DMA limit: tablesize is 1M bytes */ ACCW(PR_CTX2_C, (((uintptr_t)((uint8 *)(si->dma_buffer_pci))) | 0x00000002)); /* DMA access type is READ_AND_WRITE; * table is located in main system RAM (b12-31): * It's adress needs to be at a 4kb boundary! */ /* set the 3D rendering functions colordepth via BPIXEL's 'depth 2' */ /* note: * setting a depth to 'invalid' (zero) makes the engine report * ready with drawing 'immediately'. */ //fixme: NV30A and above (probably) needs to be corrected... switch(si->dm.space) { case B_CMAP8: if (si->ps.card_arch < NV30A) /* set depth 2: $1 = Y8 */ ACCW(BPIXEL, 0x00000100); else /* set depth 0-1: $1 = Y8, $2 = X1R5G5B5_Z1R5G5B5 */ ACCW(BPIXEL, 0x00000021); break; case B_RGB15_LITTLE: if (si->ps.card_arch < NV30A) /* set depth 2: $4 = A1R5G5B5 */ ACCW(BPIXEL, 0x00000400); else /* set depth 0-1: $2 = X1R5G5B5_Z1R5G5B5, $4 = A1R5G5B5 */ ACCW(BPIXEL, 0x00000042); break; case B_RGB16_LITTLE: if (si->ps.card_arch < NV30A) /* set depth 2: $5 = R5G6B5 */ ACCW(BPIXEL, 0x00000500); else /* set depth 0-1: $5 = R5G6B5, $a = X1A7R8G8B8_O1A7R8G8B8 */ ACCW(BPIXEL, 0x000000a5); break; case B_RGB32_LITTLE: case B_RGBA32_LITTLE: if (si->ps.card_arch < NV30A) /* set depth 2: $c = A8R8G8B8 */ ACCW(BPIXEL, 0x00000c00); else /* set depth 0-1: $7 = X8R8G8B8_Z8R8G8B8, $e = V8YB8U8YA8 */ ACCW(BPIXEL, 0x000000e7); break; default: LOG(8,("ACC: init, invalid bit depth\n")); return B_ERROR; } } if (si->ps.card_arch == NV04A) { /* do a explicit engine reset */ ACCW(DEBUG0, 0x000001ff); /* init some function blocks */ /* DEBUG0, b20 and b21 should be high, this has a big influence on * 3D rendering speed! (on all cards, confirmed) */ ACCW(DEBUG0, 0x1230c000); /* DEBUG1, b19 = 1 increases 3D rendering speed on TNT2 (M64) a bit, * TNT1 rendering speed stays the same (all cards confirmed) */ ACCW(DEBUG1, 0x72191101); ACCW(DEBUG2, 0x11d5f071); ACCW(DEBUG3, 0x0004ff31); /* init OP methods */ ACCW(DEBUG3, 0x4004ff31); /* disable all acceleration engine INT reguests */ ACCW(ACC_INTE, 0x00000000); /* reset all acceration engine INT status bits */ ACCW(ACC_INTS, 0xffffffff); /* context control enabled */ ACCW(NV04_CTX_CTRL, 0x10010100); /* all acceleration buffers, pitches and colors are valid */ ACCW(NV04_ACC_STAT, 0xffffffff); /* enable acceleration engine command FIFO */ ACCW(FIFO_EN, 0x00000001); /* setup location of active screen in framebuffer */ ACCW(OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); ACCW(OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); /* setup accesible card memory range */ ACCW(BLIMIT0, (si->ps.memory_size - 1)); ACCW(BLIMIT1, (si->ps.memory_size - 1)); /* pattern shape value = 8x8, 2 color */ //fixme: not needed, unless the engine has a hardware fault (setting via cmd)! //ACCW(PAT_SHP, 0x00000000); /* Pgraph Beta AND value (fraction) b23-30 */ ACCW(BETA_AND_VAL, 0xffffffff); } else { /* do a explicit engine reset */ ACCW(DEBUG0, 0xffffffff); ACCW(DEBUG0, 0x00000000); /* disable all acceleration engine INT reguests */ ACCW(ACC_INTE, 0x00000000); /* reset all acceration engine INT status bits */ ACCW(ACC_INTS, 0xffffffff); /* context control enabled */ ACCW(NV10_CTX_CTRL, 0x10010100); /* all acceleration buffers, pitches and colors are valid */ ACCW(NV10_ACC_STAT, 0xffffffff); /* enable acceleration engine command FIFO */ ACCW(FIFO_EN, 0x00000001); /* setup surface type: * b1-0 = %01 = surface type is non-swizzle; * this is needed to enable 3D on NV1x (confirmed) and maybe others? */ ACCW(NV10_SURF_TYP, ((ACCR(NV10_SURF_TYP)) & 0x0007ff00)); ACCW(NV10_SURF_TYP, ((ACCR(NV10_SURF_TYP)) | 0x00020101)); } if (si->ps.card_arch == NV10A) { /* init some function blocks */ ACCW(DEBUG1, 0x00118700); /* DEBUG2 has a big influence on 3D speed for NV11 and NV15 * (confirmed b3 and b18 should both be '1' on both cards!) * (b16 should also be '1', increases 3D speed on NV11 a bit more) */ ACCW(DEBUG2, 0x24fd2ad9); ACCW(DEBUG3, 0x55de0030); /* NV10_DEBUG4 has a big influence on 3D speed for NV11, NV15 and NV18 * (confirmed b14 and b15 should both be '1' on these cards!) * (confirmed b8 should be '0' on NV18 to prevent complete engine crash!) */ ACCW(NV10_DEBUG4, 0x0000c000); /* copy tile setup stuff from 'source' to acc engine */ for (cnt = 0; cnt < 32; cnt++) { NV_REG32(NVACC_NV10_TIL0AD + (cnt << 2)) = NV_REG32(NVACC_NV10_FBTIL0AD + (cnt << 2)); } /* setup location of active screen in framebuffer */ ACCW(OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); ACCW(OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); /* setup accesible card memory range */ ACCW(BLIMIT0, (si->ps.memory_size - 1)); ACCW(BLIMIT1, (si->ps.memory_size - 1)); /* pattern shape value = 8x8, 2 color */ //fixme: not needed, unless the engine has a hardware fault (setting via cmd)! //ACCW(PAT_SHP, 0x00000000); /* Pgraph Beta AND value (fraction) b23-30 */ ACCW(BETA_AND_VAL, 0xffffffff); } if (si->ps.card_arch >= NV20A) { switch (si->ps.card_arch) { case NV40A: /* init some function blocks */ ACCW(DEBUG1, 0x401287c0); ACCW(DEBUG3, 0x60de8051); /* disable specific functions, but enable SETUP_SPARE2 register */ ACCW(NV10_DEBUG4, 0x00008000); /* set limit_viol_pix_adress(?): more likely something unknown.. */ ACCW(NV25_WHAT0, 0x00be3c5f); /* setup some unknown serially accessed registers (?) */ tmp = (NV_REG32(NV32_NV4X_WHAT0) & 0x000000ff); for (cnt = 0; (tmp && !(tmp & 0x00000001)); tmp >>= 1, cnt++); { ACCW(NV4X_WHAT2, cnt); } /* unknown.. */ switch (si->ps.card_type) { case NV40: case NV45: /* and NV48: but these are pgm'd as NV45 currently */ ACCW(NV40_WHAT0, 0x83280fff); ACCW(NV40_WHAT1, 0x000000a0); ACCW(NV40_WHAT2, 0x0078e366); ACCW(NV40_WHAT3, 0x0000014c); break; case NV41: /* and ID == 0x012x: but no cards defined yet */ ACCW(NV40P_WHAT0, 0x83280eff); ACCW(NV40P_WHAT1, 0x000000a0); ACCW(NV40P_WHAT2, 0x007596ff); ACCW(NV40P_WHAT3, 0x00000108); break; case NV43: ACCW(NV40P_WHAT0, 0x83280eff); ACCW(NV40P_WHAT1, 0x000000a0); ACCW(NV40P_WHAT2, 0x0072cb77); ACCW(NV40P_WHAT3, 0x00000108); break; case NV44: case G72: ACCW(NV40P_WHAT0, 0x83280eff); ACCW(NV40P_WHAT1, 0x000000a0); NV_REG32(NV32_NV44_WHAT10) = NV_REG32(NV32_NV10STRAPINFO); NV_REG32(NV32_NV44_WHAT11) = 0x00000000; NV_REG32(NV32_NV44_WHAT12) = 0x00000000; NV_REG32(NV32_NV44_WHAT13) = NV_REG32(NV32_NV10STRAPINFO); ACCW(NV44_WHAT2, 0x00000000); ACCW(NV44_WHAT3, 0x00000000); break; /* case NV44 type 2: (cardID 0x022x) //fixme if needed: doesn't seem to need the strapinfo thing.. ACCW(NV40P_WHAT0, 0x83280eff); ACCW(NV40P_WHAT1, 0x000000a0); ACCW(NV44_WHAT2, 0x00000000); ACCW(NV44_WHAT3, 0x00000000); break; */ case G70: case G71: case G73: ACCW(NV40P_WHAT0, 0x83280eff); ACCW(NV40P_WHAT1, 0x000000a0); ACCW(NV40P_WHAT2, 0x07830610); ACCW(NV40P_WHAT3, 0x0000016a); break; default: ACCW(NV40P_WHAT0, 0x83280eff); ACCW(NV40P_WHAT1, 0x000000a0); break; } ACCW(NV10_TIL3PT, 0x2ffff800); ACCW(NV10_TIL3ST, 0x00006000); ACCW(NV4X_WHAT1, 0x01000000); /* engine data source DMA instance = $1140 */ ACCW(NV4X_DMA_SRC, 0x00001140); break; case NV30A: /* init some function blocks, but most is unknown.. */ ACCW(DEBUG1, 0x40108700); ACCW(NV25_WHAT1, 0x00140000); ACCW(DEBUG3, 0xf00e0431); ACCW(NV10_DEBUG4, 0x00008000); ACCW(NV25_WHAT0, 0xf04b1f36); ACCW(NV20_WHAT3, 0x1002d888); ACCW(NV25_WHAT2, 0x62ff007f); break; case NV20A: /* init some function blocks, but most is unknown.. */ ACCW(DEBUG1, 0x00118700); ACCW(DEBUG3, 0xf20e0431); ACCW(NV10_DEBUG4, 0x00000000); ACCW(NV20_WHAT1, 0x00000040); if (si->ps.card_type < NV25) { ACCW(NV20_WHAT2, 0x00080000); ACCW(NV10_DEBUG5, 0x00000005); ACCW(NV20_WHAT3, 0x45caa208); ACCW(NV20_WHAT4, 0x24000000); ACCW(NV20_WHAT5, 0x00000040); /* copy some fixed RAM(?) configuration info(?) to some indexed registers: */ /* b16-24 is select; b2-13 is adress in 32-bit words */ ACCW(RDI_INDEX, 0x00e00038); /* data is 32-bit */ ACCW(RDI_DATA, 0x00000030); /* copy some fixed RAM(?) configuration info(?) to some indexed registers: */ /* b16-24 is select; b2-13 is adress in 32-bit words */ ACCW(RDI_INDEX, 0x00e10038); /* data is 32-bit */ ACCW(RDI_DATA, 0x00000030); } else { ACCW(NV25_WHAT1, 0x00080000); ACCW(NV25_WHAT0, 0x304b1fb6); ACCW(NV20_WHAT3, 0x18b82880); ACCW(NV20_WHAT4, 0x44000000); ACCW(NV20_WHAT5, 0x40000080); ACCW(NV25_WHAT2, 0x000000ff); } break; } /* NV20A, NV30A and NV40A: */ /* copy tile setup stuff from previous setup 'source' to acc engine * (pattern colorRAM?) */ if ((si->ps.card_type <= NV40) || (si->ps.card_type == NV45)) { for (cnt = 0; cnt < 32; cnt++) { /* copy NV10_FBTIL0AD upto/including NV10_FBTIL7ST */ NV_REG32(NVACC_NV20_WHAT0 + (cnt << 2)) = NV_REG32(NVACC_NV10_FBTIL0AD + (cnt << 2)); /* copy NV10_FBTIL0AD upto/including NV10_FBTIL7ST */ NV_REG32(NVACC_NV20_2_WHAT0 + (cnt << 2)) = NV_REG32(NVACC_NV10_FBTIL0AD + (cnt << 2)); } } else { /* NV41, 43, 44, G70 and later */ if (si->ps.card_type >= G70) { for (cnt = 0; cnt < 60; cnt++) { /* copy NV41_FBTIL0AD upto/including G70_FBTILEST */ NV_REG32(NVACC_NV41_WHAT0 + (cnt << 2)) = NV_REG32(NVACC_NV41_FBTIL0AD + (cnt << 2)); /* copy NV41_FBTIL0AD upto/including G70_FBTILEST */ NV_REG32(NVACC_NV20_2_WHAT0 + (cnt << 2)) = NV_REG32(NVACC_NV41_FBTIL0AD + (cnt << 2)); } } else { /* NV41, 43, 44 */ for (cnt = 0; cnt < 48; cnt++) { /* copy NV41_FBTIL0AD upto/including NV41_FBTILBST */ NV_REG32(NVACC_NV20_WHAT0 + (cnt << 2)) = NV_REG32(NVACC_NV41_FBTIL0AD + (cnt << 2)); if (si->ps.card_type != NV44) { /* copy NV41_FBTIL0AD upto/including NV41_FBTILBST */ NV_REG32(NVACC_NV20_2_WHAT0 + (cnt << 2)) = NV_REG32(NVACC_NV41_FBTIL0AD + (cnt << 2)); } } } } if (si->ps.card_arch >= NV40A) { if ((si->ps.card_type == NV40) || (si->ps.card_type == NV45)) { /* copy some RAM configuration info(?) */ ACCW(NV20_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0)); ACCW(NV20_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1)); ACCW(NV40_WHAT_T2, NV_REG32(NV32_PFB_CONFIG_0)); ACCW(NV40_WHAT_T3, NV_REG32(NV32_PFB_CONFIG_1)); /* setup location of active screen in framebuffer */ ACCW(NV20_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); ACCW(NV20_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); /* setup accesible card memory range */ ACCW(NV20_BLIMIT6, (si->ps.memory_size - 1)); ACCW(NV20_BLIMIT7, (si->ps.memory_size - 1)); } else { /* NV41, 43, 44, G70 and later */ /* copy some RAM configuration info(?) */ if (si->ps.card_type >= G70) { ACCW(G70_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0)); ACCW(G70_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1)); } else { /* NV41, 43, 44 */ ACCW(NV40P_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0)); ACCW(NV40P_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1)); } ACCW(NV40P_WHAT_T2, NV_REG32(NV32_PFB_CONFIG_0)); ACCW(NV40P_WHAT_T3, NV_REG32(NV32_PFB_CONFIG_1)); /* setup location of active screen in framebuffer */ ACCW(NV40P_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); ACCW(NV40P_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); /* setup accesible card memory range */ ACCW(NV40P_BLIMIT6, (si->ps.memory_size - 1)); ACCW(NV40P_BLIMIT7, (si->ps.memory_size - 1)); } } else /* NV20A and NV30A: */ { /* copy some RAM configuration info(?) */ ACCW(NV20_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0)); ACCW(NV20_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1)); /* copy some RAM configuration info(?) to some indexed registers: */ /* b16-24 is select; b2-13 is adress in 32-bit words */ ACCW(RDI_INDEX, 0x00ea0000); /* data is 32-bit */ ACCW(RDI_DATA, NV_REG32(NV32_PFB_CONFIG_0)); /* b16-24 is select; b2-13 is adress in 32-bit words */ ACCW(RDI_INDEX, 0x00ea0004); /* data is 32-bit */ ACCW(RDI_DATA, NV_REG32(NV32_PFB_CONFIG_1)); /* setup location of active screen in framebuffer */ ACCW(NV20_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); ACCW(NV20_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); /* setup accesible card memory range */ ACCW(NV20_BLIMIT6, (si->ps.memory_size - 1)); ACCW(NV20_BLIMIT7, (si->ps.memory_size - 1)); } /* NV20A, NV30A and NV40A: */ /* setup some acc engine tile stuff */ ACCW(NV10_TIL2AD, 0x00000000); ACCW(NV10_TIL0ED, 0xffffffff); } /* all cards: */ /* setup clipping: rect size is 32768 x 32768, probably max. setting */ /* note: * can also be done via the NV_IMAGE_BLACK_RECTANGLE engine command. */ ACCW(ABS_UCLP_XMIN, 0x00000000); ACCW(ABS_UCLP_YMIN, 0x00000000); ACCW(ABS_UCLP_XMAX, 0x00007fff); ACCW(ABS_UCLP_YMAX, 0x00007fff); /* setup sync parameters for NV12_IMAGE_BLIT command for the current mode: * values given are CRTC vertical counter limit values. The NV12 command will wait * for the specified's CRTC's vertical counter to be in between the given values */ if (si->ps.card_type >= NV11) { ACCW(NV11_CRTC_LO, si->dm.timing.v_display - 1); ACCW(NV11_CRTC_HI, si->dm.timing.v_display + 1); } /*** PFIFO ***/ /* (setup caches) */ /* disable caches reassign */ ACCW(PF_CACHES, 0x00000000); /* PFIFO mode: channel 0 is in DMA mode, channels 1 - 32 are in PIO mode */ ACCW(PF_MODE, 0x00000001); /* cache1 push0 access disabled */ ACCW(PF_CACH1_PSH0, 0x00000000); /* cache1 pull0 access disabled */ ACCW(PF_CACH1_PUL0, 0x00000000); /* cache1 push1 mode = DMA */ if (si->ps.card_arch >= NV40A) ACCW(PF_CACH1_PSH1, 0x00010000); else ACCW(PF_CACH1_PSH1, 0x00000100); /* cache1 DMA Put offset = 0 (b2-28) */ ACCW(PF_CACH1_DMAP, 0x00000000); /* cache1 DMA Get offset = 0 (b2-28) */ ACCW(PF_CACH1_DMAG, 0x00000000); /* cache1 DMA instance adress = $114e (b0-15); * instance being b4-19 with baseadress NV_PRAMIN_CTX_0 (0x00700000). */ /* note: * should point to a DMA definition in CTX register space (which is sort of RAM). * This define tells the engine where the DMA cmd buffer is and what it's size is. * Inside that cmd buffer you'll find the actual issued engine commands. */ if (si->ps.card_arch >= NV40A) ACCW(PF_CACH1_DMAI, 0x00001150); else //2007 3d test.. ACCW(PF_CACH1_DMAI, 0x0000114e); /* cache0 push0 access disabled */ ACCW(PF_CACH0_PSH0, 0x00000000); /* cache0 pull0 access disabled */ ACCW(PF_CACH0_PUL0, 0x00000000); /* RAM HT (hash table) baseadress = $10000 (b4-8), size = 4k, * search = 128 (is byte offset between hash 'sets') */ /* note: * so HT base is $00710000, last is $00710fff. * In this space you define the engine command handles (HT_HANDL_XX), which * in turn points to the defines in CTX register space (which is sort of RAM) */ ACCW(PF_RAMHT, 0x03000100); /* RAM FC baseadress = $11000 (b3-8) (size is fixed to 0.5k(?)) */ /* note: * so FC base is $00711000, last is $007111ff. (not used?) */ ACCW(PF_RAMFC, 0x00000110); /* RAM RO baseadress = $11200 (b1-8), size = 0.5k */ /* note: * so RO base is $00711200, last is $007113ff. (not used?) */ /* note also: * This means(?) the PRAMIN CTX registers are accessible from base $00711400. */ ACCW(PF_RAMRO, 0x00000112); /* PFIFO size: ch0-15 = 512 bytes, ch16-31 = 124 bytes */ ACCW(PF_SIZE, 0x0000ffff); /* cache1 hash instance = $ffff (b0-15) */ ACCW(PF_CACH1_HASH, 0x0000ffff); /* disable all PFIFO INTs */ ACCW(PF_INTEN, 0x00000000); /* reset all PFIFO INT status bits */ ACCW(PF_INTSTAT, 0xffffffff); /* cache0 pull0 engine = acceleration engine (graphics) */ ACCW(PF_CACH0_PUL1, 0x00000001); /* cache1 DMA control: disable some stuff */ ACCW(PF_CACH1_DMAC, 0x00000000); /* cache1 engine 0 upto/including 7 is software (could also be graphics or DVD) */ ACCW(PF_CACH1_ENG, 0x00000000); /* cache1 DMA fetch: trigger at 128 bytes, size is 32 bytes, max requests is 15, * use little endian */ ACCW(PF_CACH1_DMAF, 0x000f0078); /* cache1 DMA push: b0 = 1: access is enabled */ ACCW(PF_CACH1_DMAS, 0x00000001); /* cache1 push0 access enabled */ ACCW(PF_CACH1_PSH0, 0x00000001); /* cache1 pull0 access enabled */ ACCW(PF_CACH1_PUL0, 0x00000001); /* cache1 pull1 engine = acceleration engine (graphics) */ ACCW(PF_CACH1_PUL1, 0x00000001); /* enable PFIFO caches reassign */ ACCW(PF_CACHES, 0x00000001); /* setup 3D specifics */ nv_init_for_3D_dma(); /*** init acceleration engine command info ***/ /* set object handles */ /* note: * probably depending on some other setup, there are 8 or 32 FIFO channels * available. Assuming the current setup only has 8 channels because the 'rest' * isn't setup here... */ si->engine.fifo.handle[0] = NV_ROP5_SOLID; si->engine.fifo.handle[1] = NV_IMAGE_BLACK_RECTANGLE; si->engine.fifo.handle[2] = NV_IMAGE_PATTERN; si->engine.fifo.handle[3] = NV4_SURFACE; /* NV10_CONTEXT_SURFACES_2D is identical */ si->engine.fifo.handle[4] = NV_IMAGE_BLIT; si->engine.fifo.handle[5] = NV4_GDI_RECTANGLE_TEXT; si->engine.fifo.handle[6] = NV4_CONTEXT_SURFACES_ARGB_ZS;//NV1_RENDER_SOLID_LIN; si->engine.fifo.handle[7] = NV4_DX5_TEXTURE_TRIANGLE; /* preset no FIFO channels assigned to cmd's */ for (cnt = 0; cnt < 0x20; cnt++) { si->engine.fifo.ch_ptr[cnt] = 0; } /* set handle's pointers to their assigned FIFO channels */ /* note: * b0-1 aren't used as adressbits. Using b0 to indicate a valid pointer. */ for (cnt = 0; cnt < 0x08; cnt++) { si->engine.fifo.ch_ptr[(si->engine.fifo.handle[cnt])] = (0x00000001 + (cnt * 0x00002000)); } /*** init DMA command buffer info ***/ if (si->ps.card_arch >= NV40A) //main mem DMA buf on pre-NV40 { si->dma_buffer = (void *)((char *)si->framebuffer + ((si->ps.memory_size - 1) & 0xffff8000)); } LOG(4, ("ACC_DMA: command buffer is at adress $%p\n", si->dma_buffer)); /* we have issued no DMA cmd's to the engine yet */ si->engine.dma.put = 0; /* the current first free adress in the DMA buffer is at offset 0 */ si->engine.dma.current = 0; /* the DMA buffer can hold 8k 32-bit words (it's 32kb in size), * or 256k 32-bit words (1Mb in size) dependant on architecture (for now) */ /* note: * one word is reserved at the end of the DMA buffer to be able to instruct the * engine to do a buffer wrap-around! * (DMA opcode 'noninc method': issue word $20000000.) */ if (si->ps.card_arch < NV40A) si->engine.dma.max = ((1 * 1024 * 1024) >> 2) - 1; else si->engine.dma.max = 8192 - 1; /* note the current free space we have left in the DMA buffer */ si->engine.dma.free = si->engine.dma.max - si->engine.dma.current; /*** init FIFO via DMA command buffer. ***/ /* wait for room in fifo for new FIFO assigment cmds if needed: */ if (si->ps.card_arch >= NV40A) { if (nv_acc_fifofree_dma(12) != B_OK) return B_ERROR; } else { if (nv_acc_fifofree_dma(16) != B_OK) return B_ERROR; } /* program new FIFO assignments */ /* Raster OPeration: */ nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH0, si->engine.fifo.handle[0]); /* Clip: */ nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH1, si->engine.fifo.handle[1]); /* Pattern: */ nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH2, si->engine.fifo.handle[2]); /* 2D Surfaces: */ nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH3, si->engine.fifo.handle[3]); /* Blit: */ nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH4, si->engine.fifo.handle[4]); /* Bitmap: */ nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH5, si->engine.fifo.handle[5]); if (si->ps.card_arch < NV40A) { /* 3D surfaces: (3D related only) */ nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH6, si->engine.fifo.handle[6]); /* Textured Triangle: (3D only) */ nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH7, si->engine.fifo.handle[7]); } /*** Set pixel width ***/ switch(si->dm.space) { case B_CMAP8: surf_depth = 0x00000001; cmd_depth = 0x00000003; break; case B_RGB15_LITTLE: case B_RGB16_LITTLE: surf_depth = 0x00000004; cmd_depth = 0x00000001; break; case B_RGB32_LITTLE: case B_RGBA32_LITTLE: surf_depth = 0x00000006; cmd_depth = 0x00000003; break; default: LOG(8,("ACC_DMA: init, invalid bit depth\n")); return B_ERROR; } /* wait for room in fifo for surface setup cmd if needed */ if (nv_acc_fifofree_dma(5) != B_OK) return B_ERROR; /* now setup 2D surface (writing 5 32bit words) */ nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 4); ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = surf_depth; /* Format */ /* setup screen pitch */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = ((si->fbc.bytes_per_row & 0x0000ffff) | (si->fbc.bytes_per_row << 16)); /* Pitch */ /* setup screen location */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer); /* OffsetSource */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer); /* OffsetDest */ /* wait for room in fifo for pattern colordepth setup cmd if needed */ if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR; /* set pattern colordepth (writing 2 32bit words) */ nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETCOLORFORMAT, 1); ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ /* wait for room in fifo for bitmap colordepth setup cmd if needed */ if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR; /* set bitmap colordepth (writing 2 32bit words) */ nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_SETCOLORFORMAT, 1); ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ /* Load our pattern into the engine: */ /* wait for room in fifo for pattern cmd if needed. */ if (nv_acc_fifofree_dma(7) != B_OK) return B_ERROR; /* now setup pattern (writing 7 32bit words) */ nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETSHAPE, 1); ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000000; /* SetShape: 0 = 8x8, 1 = 64x1, 2 = 1x64 */ nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETCOLOR0, 4); ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetColor0 */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetColor1 */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetPattern[0] */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetPattern[1] */ /* tell the engine to fetch and execute all (new) commands in the DMA buffer */ nv_start_dma(); return B_OK; } static void nv_init_for_3D_dma(void) { /* setup PGRAPH unknown registers and modify (pre-cleared) pipe stuff for 3D use */ if (si->ps.card_arch >= NV10A) { /* setup unknown PGRAPH stuff */ ACCW(PGWHAT_00, 0x00000000); ACCW(PGWHAT_01, 0x00000000); ACCW(PGWHAT_02, 0x00000000); ACCW(PGWHAT_03, 0x00000000); ACCW(PGWHAT_04, 0x00001000); ACCW(PGWHAT_05, 0x00001000); ACCW(PGWHAT_06, 0x4003ff80); ACCW(PGWHAT_07, 0x00000000); ACCW(PGWHAT_08, 0x00000000); ACCW(PGWHAT_09, 0x00000000); ACCW(PGWHAT_0A, 0x00000000); ACCW(PGWHAT_0B, 0x00000000); ACCW(PGWHAT_0C, 0x00080008); ACCW(PGWHAT_0D, 0x00080008); ACCW(PGWHAT_0E, 0x00000000); ACCW(PGWHAT_0F, 0x00000000); ACCW(PGWHAT_10, 0x00000000); ACCW(PGWHAT_11, 0x00000000); ACCW(PGWHAT_12, 0x00000000); ACCW(PGWHAT_13, 0x00000000); ACCW(PGWHAT_14, 0x00000000); ACCW(PGWHAT_15, 0x00000000); ACCW(PGWHAT_16, 0x00000000); ACCW(PGWHAT_17, 0x00000000); ACCW(PGWHAT_18, 0x00000000); ACCW(PGWHAT_19, 0x10000000); ACCW(PGWHAT_1A, 0x00000000); ACCW(PGWHAT_1B, 0x00000000); ACCW(PGWHAT_1C, 0x00000000); ACCW(PGWHAT_1D, 0x00000000); ACCW(PGWHAT_1E, 0x00000000); ACCW(PGWHAT_1F, 0x00000000); ACCW(PGWHAT_20, 0x00000000); ACCW(PGWHAT_21, 0x00000000); ACCW(PGWHAT_22, 0x08000000); ACCW(PGWHAT_23, 0x00000000); ACCW(PGWHAT_24, 0x00000000); ACCW(PGWHAT_25, 0x00000000); ACCW(PGWHAT_26, 0x00000000); ACCW(PGWHAT_27, 0x4b7fffff); ACCW(PGWHAT_28, 0x00000000); ACCW(PGWHAT_29, 0x00000000); ACCW(PGWHAT_2A, 0x00000000); /* setup window clipping */ /* b0-11 = min; b16-27 = max. * note: * probably two's complement values, so setting to max range here: * which would be -2048 upto/including +2047. */ /* horizontal */ ACCW(WINCLIP_H_0, 0x07ff0800); ACCW(WINCLIP_H_1, 0x07ff0800); ACCW(WINCLIP_H_2, 0x07ff0800); ACCW(WINCLIP_H_3, 0x07ff0800); ACCW(WINCLIP_H_4, 0x07ff0800); ACCW(WINCLIP_H_5, 0x07ff0800); ACCW(WINCLIP_H_6, 0x07ff0800); ACCW(WINCLIP_H_7, 0x07ff0800); /* vertical */ ACCW(WINCLIP_V_0, 0x07ff0800); ACCW(WINCLIP_V_1, 0x07ff0800); ACCW(WINCLIP_V_2, 0x07ff0800); ACCW(WINCLIP_V_3, 0x07ff0800); ACCW(WINCLIP_V_4, 0x07ff0800); ACCW(WINCLIP_V_5, 0x07ff0800); ACCW(WINCLIP_V_6, 0x07ff0800); ACCW(WINCLIP_V_7, 0x07ff0800); /* setup (initialize) pipe: * needed to get valid 3D rendering on (at least) NV1x cards. Without this * those cards produce rubbish instead of 3D, although the engine itself keeps * running and 2D stays OK. */ /* set eyetype to local, lightning etc. is off */ ACCW(NV10_XFMOD0, 0x10000000); /* disable all lights */ ACCW(NV10_XFMOD1, 0x00000000); /* note: upon writing data into the PIPEDAT register, the PIPEADR is * probably auto-incremented! */ /* (pipe adress = b2-16, pipe data = b0-31) */ /* note: pipe adresses IGRAPH registers! */ ACCW(NV10_PIPEADR, 0x00006740); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEDAT, 0x3f800000); ACCW(NV10_PIPEADR, 0x00006750); ACCW(NV10_PIPEDAT, 0x40000000); ACCW(NV10_PIPEDAT, 0x40000000); ACCW(NV10_PIPEDAT, 0x40000000); ACCW(NV10_PIPEDAT, 0x40000000); ACCW(NV10_PIPEADR, 0x00006760); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEDAT, 0x3f800000); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEADR, 0x00006770); ACCW(NV10_PIPEDAT, 0xc5000000); ACCW(NV10_PIPEDAT, 0xc5000000); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEADR, 0x00006780); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEDAT, 0x3f800000); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEADR, 0x000067a0); ACCW(NV10_PIPEDAT, 0x3f800000); ACCW(NV10_PIPEDAT, 0x3f800000); ACCW(NV10_PIPEDAT, 0x3f800000); ACCW(NV10_PIPEDAT, 0x3f800000); ACCW(NV10_PIPEADR, 0x00006ab0); ACCW(NV10_PIPEDAT, 0x3f800000); ACCW(NV10_PIPEDAT, 0x3f800000); ACCW(NV10_PIPEDAT, 0x3f800000); ACCW(NV10_PIPEADR, 0x00006ac0); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEADR, 0x00006c10); ACCW(NV10_PIPEDAT, 0xbf800000); ACCW(NV10_PIPEADR, 0x00007030); ACCW(NV10_PIPEDAT, 0x7149f2ca); ACCW(NV10_PIPEADR, 0x00007040); ACCW(NV10_PIPEDAT, 0x7149f2ca); ACCW(NV10_PIPEADR, 0x00007050); ACCW(NV10_PIPEDAT, 0x7149f2ca); ACCW(NV10_PIPEADR, 0x00007060); ACCW(NV10_PIPEDAT, 0x7149f2ca); ACCW(NV10_PIPEADR, 0x00007070); ACCW(NV10_PIPEDAT, 0x7149f2ca); ACCW(NV10_PIPEADR, 0x00007080); ACCW(NV10_PIPEDAT, 0x7149f2ca); ACCW(NV10_PIPEADR, 0x00007090); ACCW(NV10_PIPEDAT, 0x7149f2ca); ACCW(NV10_PIPEADR, 0x000070a0); ACCW(NV10_PIPEDAT, 0x7149f2ca); ACCW(NV10_PIPEADR, 0x00006a80); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEDAT, 0x3f800000); ACCW(NV10_PIPEADR, 0x00006aa0); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEDAT, 0x00000000); /* select primitive type that will be drawn (tri's) */ ACCW(NV10_PIPEADR, 0x00000040); ACCW(NV10_PIPEDAT, 0x00000005); ACCW(NV10_PIPEADR, 0x00006400); ACCW(NV10_PIPEDAT, 0x3f800000); ACCW(NV10_PIPEDAT, 0x3f800000); ACCW(NV10_PIPEDAT, 0x4b7fffff); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEADR, 0x00006410); ACCW(NV10_PIPEDAT, 0xc5000000); ACCW(NV10_PIPEDAT, 0xc5000000); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEADR, 0x00006420); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEADR, 0x00006430); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEADR, 0x000064c0); ACCW(NV10_PIPEDAT, 0x3f800000); ACCW(NV10_PIPEDAT, 0x3f800000); ACCW(NV10_PIPEDAT, 0x477fffff); ACCW(NV10_PIPEDAT, 0x3f800000); ACCW(NV10_PIPEADR, 0x000064d0); ACCW(NV10_PIPEDAT, 0xc5000000); ACCW(NV10_PIPEDAT, 0xc5000000); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEADR, 0x000064e0); ACCW(NV10_PIPEDAT, 0xc4fff000); ACCW(NV10_PIPEDAT, 0xc4fff000); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEADR, 0x000064f0); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEDAT, 0x00000000); ACCW(NV10_PIPEDAT, 0x00000000); /* turn lightning on */ ACCW(NV10_XFMOD0, 0x30000000); /* set light 1 to infinite type, other lights remain off */ ACCW(NV10_XFMOD1, 0x00000004); /* Z-buffer state is: * initialized, set to: 'fixed point' (integer?); Z-buffer; 16bits depth */ /* note: * other options possible are: floating point; 24bits depth; W-buffer */ ACCW(GLOB_STAT_0, 0x10000000); /* set DMA instance 2 and 3 to be invalid */ ACCW(GLOB_STAT_1, 0x00000000); } } static void nv_start_dma(void) { uint32 dummy; if (si->engine.dma.current != si->engine.dma.put) { si->engine.dma.put = si->engine.dma.current; /* flush used caches so we know for sure the DMA cmd buffer received all data. */ if (si->ps.card_arch < NV40A) { /* some CPU's support out-of-order processing (WinChip/Cyrix). Flush them. */ __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory"); /* read a non-cached adress to flush the cash */ dummy = ACCR(STATUS); } else { /* dummy read the first adress of the framebuffer to flush MTRR-WC buffers */ dummy = *((volatile uint32 *)(si->framebuffer)); } /* actually start DMA to execute all commands now in buffer */ /* note: * it doesn't matter which FIFO channel's DMA registers we access, they are in * fact all the same set. It also doesn't matter if the channel was assigned a * command or not. */ /* note also: * NV_GENERAL_DMAPUT is a write-only register on some cards (confirmed NV11). */ NV_REG32(NVACC_FIFO + NV_GENERAL_DMAPUT) = (si->engine.dma.put << 2); } } /* this routine does not check the engine's internal hardware FIFO, but the DMA * command buffer. You can see this as a FIFO as well, that feeds the hardware FIFO. * The hardware FIFO state is checked by the DMA hardware automatically. */ static status_t nv_acc_fifofree_dma(uint16 cmd_size) { uint32 dmaget; /* we'd better check for timeouts on the DMA engine as it's theoretically * breakable by malfunctioning software */ uint16 cnt = 0; /* check if the DMA buffer has enough room for the command. * note: * engine.dma.free is 'cached' */ while ((si->engine.dma.free < cmd_size) && (cnt < 10000) && (err < 3)) { /* see where the engine is currently fetching from the buffer */ /* note: * read this only once in the code as accessing registers is relatively slow */ /* note also: * it doesn't matter which FIFO channel's DMA registers we access, they are in * fact all the same set. It also doesn't matter if the channel was assigned a * command or not. */ dmaget = ((NV_REG32(NVACC_FIFO + NV_GENERAL_DMAGET)) >> 2); /* update timeout counter: on NV11 on a Pentium4 2.8Ghz max reached count * using BeRoMeter 1.2.6 was about 600; so counting 10000 before generating * a timeout should definately do it. Snooze()-ing cannot be done without a * serious speed penalty, even if done for only 1 microSecond. */ cnt++; /* where's the engine fetching viewed from us issuing? */ if (si->engine.dma.put >= dmaget) { /* engine is fetching 'behind us', the last piece of the buffer is free */ /* note the 'updated' free space we have in the DMA buffer */ si->engine.dma.free = si->engine.dma.max - si->engine.dma.current; /* if it's enough after all we exit this routine immediately. Else: */ if (si->engine.dma.free < cmd_size) { /* not enough room left, so instruct DMA engine to reset the buffer * when it's reaching the end of it */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x20000000; /* reset our buffer pointer, so new commands will be placed at the * beginning of the buffer. */ si->engine.dma.current = 0; /* tell the engine to fetch the remaining command(s) in the DMA buffer * that where not executed before. */ nv_start_dma(); /* NOW the engine is fetching 'in front of us', so the first piece * of the buffer is free */ /* note the updated current free space we have in the DMA buffer */ si->engine.dma.free = dmaget - si->engine.dma.current; /* mind this pittfall: * Leave some room between where the engine is fetching and where we * put new commands. Otherwise the engine will crash on heavy loads. * A crash can be forced best in 640x480x32 mode with BeRoMeter 1.2.6. * (confirmed on NV11 and NV43 with less than 256 words forced freespace.) * Note: * The engine is DMA triggered for fetching chunks every 128 bytes, * maybe this is the reason for this behaviour. * Note also: * it looks like the space that needs to be kept free is coupled * with the size of the DMA buffer. */ if (si->engine.dma.free < 256) si->engine.dma.free = 0; else si->engine.dma.free -= 256; } } else { /* engine is fetching 'in front of us', so the first piece of the buffer * is free */ /* note the updated current free space we have in the DMA buffer */ si->engine.dma.free = dmaget - si->engine.dma.current; /* mind this pittfall: * Leave some room between where the engine is fetching and where we * put new commands. Otherwise the engine will crash on heavy loads. * A crash can be forced best in 640x480x32 mode with BeRoMeter 1.2.6. * (confirmed on NV11 and NV43 with less than 256 words forced freespace.) * Note: * The engine is DMA triggered for fetching chunks every 128 bytes, * maybe this is the reason for this behaviour. * Note also: * it looks like the space that needs to be kept free is coupled * with the size of the DMA buffer. */ if (si->engine.dma.free < 256) si->engine.dma.free = 0; else si->engine.dma.free -= 256; } } /* log timeout if we had one */ if (cnt == 10000) { if (err < 3) err++; LOG(4,("ACC_DMA: fifofree; DMA timeout #%d, engine trouble!\n", err)); } /* we must make the acceleration routines abort or the driver will hang! */ if (err >= 3) return B_ERROR; return B_OK; } static void nv_acc_cmd_dma(uint32 cmd, uint16 offset, uint16 size) { /* NV_FIFO_DMA_OPCODE: set number of cmd words (b18 - 28); set FIFO offset for * first cmd word (b2 - 15); set DMA opcode = method (b29 - 31). * a 'NOP' is the opcode word $00000000. */ /* note: * possible DMA opcodes: * b'000' is 'method' (execute cmd); * b'001' is 'jump'; * b'002' is 'noninc method' (execute buffer wrap-around); * b'003' is 'call': return is executed by opcode word $00020000 (b17 = 1). */ /* note also: * this system uses auto-increments for the FIFO offset adresses. Make sure * to set a new adress if a gap exists between the previous one and the new one. */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = ((size << 18) | ((si->engine.fifo.ch_ptr[cmd] + offset) & 0x0000fffc)); /* space left after issuing the current command is the cmd AND it's arguments less */ si->engine.dma.free -= (size + 1); } static void nv_acc_set_ch_dma(uint16 ch, uint32 handle) { /* issue FIFO channel assign cmd */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = ((1 << 18) | ch); /* set new assignment */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = (0x80000000 | handle); /* space left after issuing the current command is the cmd AND it's arguments less */ si->engine.dma.free -= 2; } /* note: * switching fifo channel assignments this way has no noticable slowdown: * measured 0.2% with Quake2. */ void nv_acc_assert_fifo_dma(void) { /* does every engine cmd this accelerant needs have a FIFO channel? */ //fixme: can probably be optimized for both speed and channel selection... if (!si->engine.fifo.ch_ptr[NV_ROP5_SOLID] || !si->engine.fifo.ch_ptr[NV_IMAGE_BLACK_RECTANGLE] || !si->engine.fifo.ch_ptr[NV_IMAGE_PATTERN] || !si->engine.fifo.ch_ptr[NV4_SURFACE] || !si->engine.fifo.ch_ptr[NV_IMAGE_BLIT] || !si->engine.fifo.ch_ptr[NV4_GDI_RECTANGLE_TEXT] || !si->engine.fifo.ch_ptr[NV_SCALED_IMAGE_FROM_MEMORY]) { uint16 cnt; /* free the FIFO channels we want from the currently assigned cmd's */ si->engine.fifo.ch_ptr[si->engine.fifo.handle[0]] = 0; si->engine.fifo.ch_ptr[si->engine.fifo.handle[1]] = 0; si->engine.fifo.ch_ptr[si->engine.fifo.handle[2]] = 0; si->engine.fifo.ch_ptr[si->engine.fifo.handle[3]] = 0; si->engine.fifo.ch_ptr[si->engine.fifo.handle[4]] = 0; si->engine.fifo.ch_ptr[si->engine.fifo.handle[5]] = 0; si->engine.fifo.ch_ptr[si->engine.fifo.handle[6]] = 0; /* set new object handles */ si->engine.fifo.handle[0] = NV_ROP5_SOLID; si->engine.fifo.handle[1] = NV_IMAGE_BLACK_RECTANGLE; si->engine.fifo.handle[2] = NV_IMAGE_PATTERN; si->engine.fifo.handle[3] = NV4_SURFACE; si->engine.fifo.handle[4] = NV_IMAGE_BLIT; si->engine.fifo.handle[5] = NV4_GDI_RECTANGLE_TEXT; si->engine.fifo.handle[6] = NV_SCALED_IMAGE_FROM_MEMORY; /* set handle's pointers to their assigned FIFO channels */ /* note: * b0-1 aren't used as adressbits. Using b0 to indicate a valid pointer. */ for (cnt = 0; cnt < 0x08; cnt++) { si->engine.fifo.ch_ptr[(si->engine.fifo.handle[cnt])] = (0x00000001 + (cnt * 0x00002000)); } /* wait for room in fifo for new FIFO assigment cmds if needed. */ if (nv_acc_fifofree_dma(14) != B_OK) return; /* program new FIFO assignments */ /* Raster OPeration: */ nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH0, si->engine.fifo.handle[0]); /* Clip: */ nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH1, si->engine.fifo.handle[1]); /* Pattern: */ nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH2, si->engine.fifo.handle[2]); /* 2D Surface: */ nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH3, si->engine.fifo.handle[3]); /* Blit: */ nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH4, si->engine.fifo.handle[4]); /* Bitmap: */ nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH5, si->engine.fifo.handle[5]); /* Scaled and fitered Blit: */ nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH6, si->engine.fifo.handle[6]); /* tell the engine to fetch and execute all (new) commands in the DMA buffer */ nv_start_dma(); } } /* note: moved acceleration 'top-level' routines to be integrated in the engine: it is costly to call the engine for every single function within a loop! (measured with BeRoMeter 1.2.6: upto 15% speed increase on all CPU's.) note also: splitting up each command list into sublists (see routines below) prevents a lot more nested calls, further increasing the speed with upto 70%. finally: sending the sublist to just one single engine command even further increases speed with upto another 10%. This can't be done for blits though, as this engine- command's hardware does not support multiple objects. */ /* screen to screen blit - i.e. move windows around and scroll within them. */ void SCREEN_TO_SCREEN_BLIT_DMA(engine_token *et, blit_params *list, uint32 count) { uint32 i = 0; uint16 subcnt; /*** init acc engine for blit function ***/ /* ROP registers (Raster OPeration): * wait for room in fifo for ROP cmd if needed. */ if (nv_acc_fifofree_dma(2) != B_OK) return; /* now setup ROP (writing 2 32bit words) for GXcopy */ nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xcc; /* SetRop5 */ /*** do each blit ***/ /* Note: * blit-copy direction is determined inside nvidia hardware: no setup needed */ while (count) { /* break up the list in sublists to minimize calls, while making sure long * lists still get executed without trouble */ subcnt = 32; if (count < 32) subcnt = count; count -= subcnt; /* wait for room in fifo for blit cmd if needed. */ if (nv_acc_fifofree_dma(4 * subcnt) != B_OK) return; while (subcnt--) { /* now setup blit (writing 4 32bit words) */ nv_acc_cmd_dma(NV_IMAGE_BLIT, NV_IMAGE_BLIT_SOURCEORG, 3); ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = (((list[i].src_top) << 16) | (list[i].src_left)); /* SourceOrg */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = (((list[i].dest_top) << 16) | (list[i].dest_left)); /* DestOrg */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = ((((list[i].height) + 1) << 16) | ((list[i].width) + 1)); /* HeightWidth */ i++; } /* tell the engine to fetch the commands in the DMA buffer that where not * executed before. */ nv_start_dma(); } /* tell 3D add-ons that they should reload their rendering states and surfaces */ si->engine.threeD.reload = 0xffffffff; } /* scaled and filtered screen to screen blit - i.e. video playback without overlay */ /* note: source and destination may not overlap. */ //fixme? checkout NV5 and NV10 version of cmd: faster?? (or is 0x77 a 'autoselect' version?) void SCREEN_TO_SCREEN_SCALED_FILTERED_BLIT_DMA(engine_token *et, scaled_blit_params *list, uint32 count) { uint32 i = 0; uint16 subcnt; uint32 cmd_depth; uint8 bpp; /*** init acc engine for scaled filtered blit function ***/ /* Set pixel width */ switch(si->dm.space) { case B_RGB15_LITTLE: cmd_depth = 0x00000002; bpp = 2; break; case B_RGB16_LITTLE: cmd_depth = 0x00000007; bpp = 2; break; case B_RGB32_LITTLE: case B_RGBA32_LITTLE: cmd_depth = 0x00000004; bpp = 4; break; /* fixme sometime: * we could do the spaces below if this function would be modified to be able * to use a source outside of the desktop, i.e. using offscreen bitmaps... */ case B_YCbCr422: cmd_depth = 0x00000005; bpp = 2; break; case B_YUV422: cmd_depth = 0x00000006; bpp = 2; break; default: /* note: this function does not support src or dest in the B_CMAP8 space! */ //fixme: the NV10 version of this cmd supports B_CMAP8 src though... (checkout) LOG(8,("ACC_DMA: scaled_filtered_blit, invalid bit depth\n")); return; } /* modify surface depth settings for 15-bit colorspace so command works as intended */ if (si->dm.space == B_RGB15_LITTLE) { /* wait for room in fifo for surface setup cmd if needed */ if (nv_acc_fifofree_dma(2) != B_OK) return; /* now setup 2D surface (writing 1 32bit word) */ nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 1); ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000002; /* Format */ } /* TNT1 has fixed operation mode 'SRCcopy' while the rest can be programmed: */ if (si->ps.card_type != NV04) { /* wait for room in fifo for cmds if needed. */ if (nv_acc_fifofree_dma(5) != B_OK) return; /* now setup source bitmap colorspace */ nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SETCOLORFORMAT, 2); ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ /* now setup operation mode to SRCcopy */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000003; /* SetOperation */ } else { /* wait for room in fifo for cmd if needed. */ if (nv_acc_fifofree_dma(4) != B_OK) return; /* now setup source bitmap colorspace */ nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SETCOLORFORMAT, 1); ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ /* TNT1 has fixed operation mode SRCcopy */ } /* now setup fill color (writing 2 32bit words) */ nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000000; /* Color1A */ /*** do each blit ***/ while (count) { /* break up the list in sublists to minimize calls, while making sure long * lists still get executed without trouble */ subcnt = 16; if (count < 16) subcnt = count; count -= subcnt; /* wait for room in fifo for blit cmd if needed. */ if (nv_acc_fifofree_dma(12 * subcnt) != B_OK) return; while (subcnt--) { /* now setup blit (writing 12 32bit words) */ nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SOURCEORG, 6); /* setup dest clipping ref for blit (not used) (b0-15 = left, b16-31 = top) */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0; /* SourceOrg */ /* setup dest clipping size for blit */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = (((list[i].dest_height + 1) << 16) | (list[i].dest_width + 1)); /* SourceHeightWidth */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = /* setup destination location and size for blit */ (((list[i].dest_top) << 16) | (list[i].dest_left)); /* DestOrg */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = (((list[i].dest_height + 1) << 16) | (list[i].dest_width + 1)); /* DestHeightWidth */ //fixme: findout scaling limits... (although the current cmd interface doesn't support them.) ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = (((list[i].src_width + 1) << 20) / (list[i].dest_width + 1)); /* HorInvScale (in 12.20 format) */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = (((list[i].src_height + 1) << 20) / (list[i].dest_height + 1)); /* VerInvScale (in 12.20 format) */ nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SOURCESIZE, 4); /* setup horizontal and vertical source (fetching) ends. * note: * horizontal granularity is 2 pixels, vertical granularity is 1 pixel. * look at Matrox or Neomagic bes engines code for usage example. */ //fixme: tested 15, 16 and 32-bit RGB depth, verify other depths... ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = (((list[i].src_height + 1) << 16) | (((list[i].src_width + 1) + 0x0001) & ~0x0001)); /* SourceHeightWidth */ /* setup source pitch (b0-15). Set 'format origin center' (b16-17) and * select 'format interpolator foh (bilinear filtering)' (b24). */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = (si->fbc.bytes_per_row | (1 << 16) | (1 << 24)); /* SourcePitch */ /* setup source surface location */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = ((uint32)((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)) + (list[i].src_top * si->fbc.bytes_per_row) + (list[i].src_left * bpp); /* Offset */ /* setup source start: first (sub)pixel contributing to output picture */ /* note: * clipping is not asked for. * look at nVidia NV10+ bes engine code for useage example. */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0; /* SourceRef (b0-15 = hor, b16-31 = ver: both in 12.4 format) */ i++; } /* tell the engine to fetch the commands in the DMA buffer that where not * executed before. */ nv_start_dma(); } /* reset surface depth settings so the other engine commands works as intended */ if (si->dm.space == B_RGB15_LITTLE) { /* wait for room in fifo for surface setup cmd if needed */ if (nv_acc_fifofree_dma(2) != B_OK) return; /* now setup 2D surface (writing 1 32bit word) */ nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 1); ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000004; /* Format */ /* tell the engine to fetch the commands in the DMA buffer that where not * executed before. */ nv_start_dma(); } /* tell 3D add-ons that they should reload their rendering states and surfaces */ si->engine.threeD.reload = 0xffffffff; } /* scaled and filtered screen to screen blit - i.e. video playback without overlay */ /* note: source and destination may not overlap. */ // FIXME? checkout NV5 and NV10 version of cmd: faster?? (or is 0x77 a 'autoselect' version?) void OFFSCREEN_TO_SCREEN_SCALED_FILTERED_BLIT_DMA( engine_token *et, offscreen_buffer_config *config, clipped_scaled_blit_params *list, uint32 count) { uint32 i = 0; uint32 cmd_depth; uint8 bpp; LOG(4, ("ACC_DMA: offscreen src buffer location $%p\n", (uint8*)(config->buffer))); /*** init acc engine for scaled filtered blit function ***/ /* Set pixel width */ switch (config->space) { case B_RGB15_LITTLE: cmd_depth = 0x00000002; bpp = 2; break; case B_RGB16_LITTLE: cmd_depth = 0x00000007; bpp = 2; break; case B_RGB32_LITTLE: case B_RGBA32_LITTLE: cmd_depth = 0x00000004; bpp = 4; break; /* fixme sometime: * we could do the spaces below if this function would be modified to be able * to use a source outside of the desktop, i.e. using offscreen bitmaps... */ case B_YCbCr422: cmd_depth = 0x00000005; bpp = 2; break; case B_YUV422: cmd_depth = 0x00000006; bpp = 2; break; default: /* note: this function does not support src or dest in the B_CMAP8 space! */ //fixme: the NV10 version of this cmd supports B_CMAP8 src though... (checkout) LOG(8,("ACC_DMA: scaled_filtered_blit, invalid bit depth\n")); return; } /* modify surface depth settings for 15-bit colorspace so command works as intended */ if (si->dm.space == B_RGB15_LITTLE) { /* wait for room in fifo for surface setup cmd if needed */ if (nv_acc_fifofree_dma(2) != B_OK) return; /* now setup 2D surface (writing 1 32bit word) */ nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 1); ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000002; /* Format */ } /* TNT1 has fixed operation mode 'SRCcopy' while the rest can be programmed: */ if (si->ps.card_type != NV04) { /* wait for room in fifo for cmds if needed. */ if (nv_acc_fifofree_dma(5) != B_OK) return; /* now setup source bitmap colorspace */ nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SETCOLORFORMAT, 2); ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ /* now setup operation mode to SRCcopy */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000003; /* SetOperation */ } else { /* wait for room in fifo for cmd if needed. */ if (nv_acc_fifofree_dma(4) != B_OK) return; /* now setup source bitmap colorspace */ nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SETCOLORFORMAT, 1); ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ /* TNT1 has fixed operation mode SRCcopy */ } /* now setup fill color (writing 2 32bit words) */ nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000000; /* Color1A */ /*** do each blit ***/ while (count--) { uint32 j = 0; uint16 clipcnt = list[i].dest_clipcount; LOG(4,("ACC_DMA: offscreen src left %d, top %d\n", list[i].src_left, list[i].src_top)); LOG(4,("ACC_DMA: offscreen src width %d, height %d\n", list[i].src_width + 1, list[i].src_height + 1)); LOG(4,("ACC_DMA: offscreen dest left %d, top %d\n", list[i].dest_left, list[i].dest_top)); LOG(4,("ACC_DMA: offscreen dest width %d, height %d\n", list[i].dest_width + 1, list[i].dest_height + 1)); /* wait for room in fifo for blit cmd if needed. */ if (nv_acc_fifofree_dma(9 + (5 * clipcnt)) != B_OK) return; /* now setup blit (writing 12 32bit words) */ nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SOURCEORG + 8, 4); /* setup destination location and size for blit */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = ((list[i].dest_top << 16) | list[i].dest_left); /* DestTopLeftOutputRect */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = (((list[i].dest_height + 1) << 16) | (list[i].dest_width + 1)); /* DestHeightWidthOutputRect */ /* setup scaling */ //fixme: findout scaling limits... (although the current cmd interface doesn't support them.) ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = (((list[i].src_width + 1) << 20) / (list[i].dest_width + 1)); /* HorInvScale (in 12.20 format) */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = (((list[i].src_height + 1) << 20) / (list[i].dest_height + 1)); /* VerInvScale (in 12.20 format) */ nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SOURCESIZE, 3); /* setup horizontal and vertical source (fetching) ends. * note: * horizontal granularity is 2 pixels, vertical granularity is 1 pixel. * look at Matrox or Neomagic bes engines code for usage example. */ //fixme: tested 15, 16 and 32-bit RGB depth, verify other depths... ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = (((list[i].src_height + 1) << 16) | (((list[i].src_width + 1) + 0x0001) & ~0x0001)); /* SourceHeightWidth */ /* setup source pitch (b0-15). Set 'format origin center' (b16-17) and * select 'format interpolator foh (bilinear filtering)' (b24). */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = (config->bytes_per_row | (1 << 16) | (1 << 24)); /* SourcePitch */ /* setup source surface location */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = (uint32)((uint8*)config->buffer - (uint8*)si->framebuffer + (list[i].src_top * config->bytes_per_row) + (list[i].src_left * bpp)); /* Offset */ while (clipcnt--) { LOG(4,("ACC_DMA: offscreen clip left %d, top %d\n", list[i].dest_cliplist[j].left, list[i].dest_cliplist[j].top)); LOG(4,("ACC_DMA: offscreen clip width %d, height %d\n", list[i].dest_cliplist[j].width + 1, list[i].dest_cliplist[j].height + 1)); /* now setup blit (writing 12 32bit words) */ nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SOURCEORG, 2); /* setup dest clipping rect for blit (b0-15 = left, b16-31 = top) */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = (list[i].dest_cliplist[j].top << 16) | list[i].dest_cliplist[j].left; /* DestTopLeftClipRect */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = ((list[i].dest_cliplist[j].height + 1) << 16) | (list[i].dest_cliplist[j].width + 1); /* DestHeightWidthClipRect */ nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SOURCESIZE + 12, 1); /* setup source start: first (sub)pixel contributing to output picture */ /* note: * clipping is not asked for. * look at nVidia NV10+ bes engine code for useage example. */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0; /* SourceRef (b0-15 = hor, b16-31 = ver: both in 12.4 format) */ j++; } i++; } /* tell the engine to fetch the commands in the DMA buffer that where not * executed before. */ nv_start_dma(); /* reset surface depth settings so the other engine commands works as intended */ if (si->dm.space == B_RGB15_LITTLE) { /* wait for room in fifo for surface setup cmd if needed */ if (nv_acc_fifofree_dma(2) != B_OK) return; /* now setup 2D surface (writing 1 32bit word) */ nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 1); ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000004; /* Format */ /* tell the engine to fetch the commands in the DMA buffer that where not * executed before. */ nv_start_dma(); } /* tell 3D add-ons that they should reload their rendering states and surfaces */ si->engine.threeD.reload = 0xffffffff; } /* rectangle fill - i.e. workspace and window background color */ void FILL_RECTANGLE_DMA(engine_token *et, uint32 colorIndex, fill_rect_params *list, uint32 count) { uint32 i = 0; uint16 subcnt; /*** init acc engine for fill function ***/ /* ROP registers (Raster OPeration): * wait for room in fifo for ROP and bitmap cmd if needed. */ if (nv_acc_fifofree_dma(4) != B_OK) return; /* now setup ROP (writing 2 32bit words) for GXcopy */ nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xcc; /* SetRop5 */ /* now setup fill color (writing 2 32bit words) */ nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = colorIndex; /* Color1A */ /*** draw each rectangle ***/ while (count) { /* break up the list in sublists to minimize calls, while making sure long * lists still get executed without trouble */ subcnt = 32; if (count < 32) subcnt = count; count -= subcnt; /* wait for room in fifo for bitmap cmd if needed. */ if (nv_acc_fifofree_dma(1 + (2 * subcnt)) != B_OK) return; /* issue fill command once... */ nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, (2 * subcnt)); /* ... and send multiple rects (engine cmd supports 32 max) */ while (subcnt--) { ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = (((list[i].left) << 16) | ((list[i].top) & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = (((((list[i].right)+1) - (list[i].left)) << 16) | (((list[i].bottom-list[i].top)+1) & 0x0000ffff)); /* Unclipped Rect 0 WidthHeight */ i++; } /* tell the engine to fetch the commands in the DMA buffer that where not * executed before. */ nv_start_dma(); } /* tell 3D add-ons that they should reload their rendering states and surfaces */ si->engine.threeD.reload = 0xffffffff; } /* span fill - i.e. (selected) menuitem background color (Dano) */ void FILL_SPAN_DMA(engine_token *et, uint32 colorIndex, uint16 *list, uint32 count) { uint32 i = 0; uint16 subcnt; /*** init acc engine for fill function ***/ /* ROP registers (Raster OPeration): * wait for room in fifo for ROP and bitmap cmd if needed. */ if (nv_acc_fifofree_dma(4) != B_OK) return; /* now setup ROP (writing 2 32bit words) for GXcopy */ nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xcc; /* SetRop5 */ /* now setup fill color (writing 2 32bit words) */ nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = colorIndex; /* Color1A */ /*** draw each span ***/ while (count) { /* break up the list in sublists to minimize calls, while making sure long * lists still get executed without trouble */ subcnt = 32; if (count < 32) subcnt = count; count -= subcnt; /* wait for room in fifo for bitmap cmd if needed. */ if (nv_acc_fifofree_dma(1 + (2 * subcnt)) != B_OK) return; /* issue fill command once... */ nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, (2 * subcnt)); /* ... and send multiple rects (spans) (engine cmd supports 32 max) */ while (subcnt--) { ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = (((list[i+1]) << 16) | ((list[i]) & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = ((((list[i+2]+1) - (list[i+1])) << 16) | 0x00000001); /* Unclipped Rect 0 WidthHeight */ i+=3; } /* tell the engine to fetch the commands in the DMA buffer that where not * executed before. */ nv_start_dma(); } /* tell 3D add-ons that they should reload their rendering states and surfaces */ si->engine.threeD.reload = 0xffffffff; } /* rectangle invert - i.e. text cursor and text selection */ void INVERT_RECTANGLE_DMA(engine_token *et, fill_rect_params *list, uint32 count) { uint32 i = 0; uint16 subcnt; /*** init acc engine for invert function ***/ /* ROP registers (Raster OPeration): * wait for room in fifo for ROP and bitmap cmd if needed. */ if (nv_acc_fifofree_dma(4) != B_OK) return; /* now setup ROP (writing 2 32bit words) for GXinvert */ nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x55; /* SetRop5 */ /* now reset fill color (writing 2 32bit words) */ nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000000; /* Color1A */ /*** invert each rectangle ***/ while (count) { /* break up the list in sublists to minimize calls, while making sure long * lists still get executed without trouble */ subcnt = 32; if (count < 32) subcnt = count; count -= subcnt; /* wait for room in fifo for bitmap cmd if needed. */ if (nv_acc_fifofree_dma(1 + (2 * subcnt)) != B_OK) return; /* issue fill command once... */ nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, (2 * subcnt)); /* ... and send multiple rects (engine cmd supports 32 max) */ while (subcnt--) { ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = (((list[i].left) << 16) | ((list[i].top) & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */ ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = (((((list[i].right)+1) - (list[i].left)) << 16) | (((list[i].bottom-list[i].top)+1) & 0x0000ffff)); /* Unclipped Rect 0 WidthHeight */ i++; } /* tell the engine to fetch the commands in the DMA buffer that where not * executed before. */ nv_start_dma(); } /* tell 3D add-ons that they should reload their rendering states and surfaces */ si->engine.threeD.reload = 0xffffffff; }