1 /* NV Acceleration functions */ 2 3 /* Author: 4 Rudolf Cornelissen 8/2003-6/2005. 5 6 This code was possible thanks to: 7 - the Linux XFree86 NV driver, 8 - the Linux UtahGLX 3D driver. 9 */ 10 11 #define MODULE_BIT 0x00080000 12 13 /* 3D command defines (needed for concurrent overlay/3D 'workaround') 14 * note: 15 * the workaround contains of two pieces: 16 * - we have to issue a 3D drawing command before overlay is activated to prevent 17 * the acceleration engine to crash; 18 * - we have to forego FIFO assignment switching: switching while we use overlay 19 * crashes the acceleration engine as well. 20 * 21 * Hopefully we can find the _real_ solution for this one day... */ 22 #define RIVA_STATE3D_05(t0, t1, t2, bb, cc) \ 23 { \ 24 nv_acc_cmd_dma(NV4_DX5_TEXTURE_TRIANGLE, NV4_DX5_TEXTURE_TRIANGLE_COLORKEY, 7); \ 25 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0; /* Colorkey */ \ 26 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = t0; /* Offset */ \ 27 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = t1; /* Format */ \ 28 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = t2; /* Filter */ \ 29 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = bb; /* Blend */ \ 30 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cc; /* Control */ \ 31 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0; /* FogColor */ \ 32 } 33 34 #define RIVA_VERTEX3D_05(ii, xx, yy) \ 35 { \ 36 nv_acc_cmd_dma(NV4_DX5_TEXTURE_TRIANGLE, NV4_DX5_TEXTURE_TRIANGLE_TLVERTEX(ii), 8); \ 37 ((float *)(si->dma_buffer))[si->engine.dma.current++] = xx; /* ScreenX */ \ 38 ((float *)(si->dma_buffer))[si->engine.dma.current++] = yy; /* ScreenY */ \ 39 ((float *)(si->dma_buffer))[si->engine.dma.current++] = 0.0f; /* ScreenZ */ \ 40 ((float *)(si->dma_buffer))[si->engine.dma.current++] = 1.0f; /* RWH */ \ 41 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0; /* Color */ \ 42 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0; /* Specular */ \ 43 ((float *)(si->dma_buffer))[si->engine.dma.current++] = 0.0f; /* TU */ \ 44 ((float *)(si->dma_buffer))[si->engine.dma.current++] = 0.0f; /* TV */ \ 45 } 46 47 #define RIVA_DRAWQUAD3D_05(v0, v1, v2, v3) \ 48 { \ 49 nv_acc_cmd_dma(NV4_DX5_TEXTURE_TRIANGLE, NV4_DX5_TEXTURE_TRIANGLE_TLVDRAWPRIM(0), 1); \ 50 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = \ 51 (((v3)<<20)|((v2)<<16)|((v0)<<12)|((v2)<<8)|((v1)<<4)|(v0)); /* TLVDrawPrim */ \ 52 } 53 54 55 #include "nv_std.h" 56 57 /*acceleration notes*/ 58 59 /*functions Be's app_server uses: 60 fill span (horizontal only) 61 fill rectangle (these 2 are very similar) 62 invert rectangle 63 blit 64 */ 65 66 static void nv_init_for_3D_dma(void); 67 static void nv_start_dma(void); 68 static status_t nv_acc_fifofree_dma(uint16 cmd_size); 69 static void nv_acc_cmd_dma(uint32 cmd, uint16 offset, uint16 size); 70 static void nv_acc_set_ch_dma(uint16 ch, uint32 handle); 71 72 /* used to track engine DMA stalls */ 73 static uint8 err; 74 75 /* wait until engine completely idle */ 76 status_t nv_acc_wait_idle_dma() 77 { 78 /* we'd better check for timeouts on the DMA engine as it's theoretically 79 * breakable by malfunctioning software */ 80 uint16 cnt = 0; 81 82 /* wait until all upcoming commands are in execution at least. Do this until 83 * we hit a timeout; abort if we failed at least three times before: 84 * if DMA stalls, we have to forget about it alltogether at some point, or 85 * the system will almost come to a complete halt.. */ 86 /* note: 87 * it doesn't matter which FIFO channel's DMA registers we access, they are in 88 * fact all the same set. It also doesn't matter if the channel was assigned a 89 * command or not. */ 90 while ((NV_REG32(NVACC_FIFO + NV_GENERAL_DMAGET) != (si->engine.dma.put << 2)) && 91 (cnt < 10000) && (err < 3)) 92 { 93 /* snooze a bit so I do not hammer the bus */ 94 snooze (100); 95 cnt++; 96 } 97 98 /* log timeout if we had one */ 99 if (cnt == 10000) 100 { 101 if (err < 3) err++; 102 LOG(4,("ACC_DMA: wait_idle; DMA timeout #%d, engine trouble!\n", err)); 103 } 104 105 /* wait until execution completed */ 106 while (ACCR(STATUS)) 107 { 108 /* snooze a bit so I do not hammer the bus */ 109 snooze (100); 110 } 111 112 return B_OK; 113 } 114 115 /* AFAIK this must be done for every new screenmode. 116 * Engine required init. */ 117 status_t nv_acc_init_dma() 118 { 119 uint32 cnt; 120 uint32 surf_depth, cmd_depth; 121 /* reset the engine DMA stalls counter */ 122 err = 0; 123 124 /* a hanging engine only recovers from a complete power-down/power-up cycle */ 125 NV_REG32(NV32_PWRUPCTRL) = 0x13110011; 126 snooze(1000); 127 NV_REG32(NV32_PWRUPCTRL) = 0x13111111; 128 129 /* don't try this on NV20 and later.. */ 130 if (si->ps.card_arch < NV20A) 131 { 132 /* actively reset the PGRAPH registerset (acceleration engine) */ 133 for (cnt = 0x00400000; cnt < 0x00402000; cnt +=4) 134 { 135 NV_REG32(cnt) = 0x00000000; 136 } 137 } 138 139 /* setup PTIMER: */ 140 //fixme? how about NV28 setup as just after coldstarting? (see nv_info.c) 141 /* set timer numerator to 8 (in b0-15) */ 142 ACCW(PT_NUMERATOR, 0x00000008); 143 /* set timer denominator to 3 (in b0-15) */ 144 ACCW(PT_DENOMINATR, 0x00000003); 145 146 /* disable timer-alarm INT requests (b0) */ 147 ACCW(PT_INTEN, 0x00000000); 148 /* reset timer-alarm INT status bit (b0) */ 149 ACCW(PT_INTSTAT, 0xffffffff); 150 151 /* enable PRAMIN write access on pre NV10 before programming it! */ 152 if (si->ps.card_arch == NV04A) 153 { 154 /* set framebuffer config: type = notiling, PRAMIN write access enabled */ 155 NV_REG32(NV32_PFB_CONFIG_0) = 0x00001114; 156 } 157 else 158 { 159 /* setup acc engine 'source' tile adressranges */ 160 ACCW(NV10_FBTIL0AD, 0); 161 ACCW(NV10_FBTIL1AD, 0); 162 ACCW(NV10_FBTIL2AD, 0); 163 ACCW(NV10_FBTIL3AD, 0); 164 ACCW(NV10_FBTIL4AD, 0); 165 ACCW(NV10_FBTIL5AD, 0); 166 ACCW(NV10_FBTIL6AD, 0); 167 ACCW(NV10_FBTIL7AD, 0); 168 ACCW(NV10_FBTIL0ED, (si->ps.memory_size - 1)); 169 ACCW(NV10_FBTIL1ED, (si->ps.memory_size - 1)); 170 ACCW(NV10_FBTIL2ED, (si->ps.memory_size - 1)); 171 ACCW(NV10_FBTIL3ED, (si->ps.memory_size - 1)); 172 ACCW(NV10_FBTIL4ED, (si->ps.memory_size - 1)); 173 ACCW(NV10_FBTIL5ED, (si->ps.memory_size - 1)); 174 ACCW(NV10_FBTIL6ED, (si->ps.memory_size - 1)); 175 ACCW(NV10_FBTIL7ED, (si->ps.memory_size - 1)); 176 } 177 178 /*** PRAMIN ***/ 179 /* first clear the entire RAMHT (hash-table) space to a defined state. It turns 180 * out at least NV11 will keep the previously programmed handles over resets and 181 * power-outages upto about 15 seconds!! Faulty entries might well hang the 182 * engine (confirmed on NV11). 183 * Note: 184 * this behaviour is not very strange: even very old DRAM chips are known to be 185 * able to do this, even though you should refresh them every few milliseconds or 186 * so. (Large memory cell capacitors, though different cells vary a lot in their 187 * capacity.) 188 * Of course data validity is not certain by a long shot over this large 189 * amount of time.. */ 190 for(cnt = 0; cnt < 0x0400; cnt++) 191 NV_REG32(NVACC_HT_HANDL_00 + (cnt << 2)) = 0; 192 /* RAMHT (hash-table) space SETUP FIFO HANDLES */ 193 /* note: 194 * 'instance' tells you where the engine command is stored in 'PR_CTXx_x' sets 195 * below: instance being b4-19 with baseadress NV_PRAMIN_CTX_0 (0x00700000). 196 * That command is linked to the handle noted here. This handle is then used to 197 * tell the FIFO to which engine command it is connected! 198 * (CTX registers are actually a sort of RAM space.) */ 199 if (si->ps.card_arch >= NV40A) 200 { 201 /* (first set) */ 202 ACCW(HT_HANDL_00, (0x80000000 | NV10_CONTEXT_SURFACES_2D)); /* 32bit handle (not used) */ 203 ACCW(HT_VALUE_00, 0x0010114c); /* instance $114c, engine = acc engine, CHID = $00 */ 204 205 ACCW(HT_HANDL_01, (0x80000000 | NV_IMAGE_BLIT)); /* 32bit handle */ 206 ACCW(HT_VALUE_01, 0x00101148); /* instance $1148, engine = acc engine, CHID = $00 */ 207 208 ACCW(HT_HANDL_02, (0x80000000 | NV4_GDI_RECTANGLE_TEXT)); /* 32bit handle */ 209 ACCW(HT_VALUE_02, 0x0010114a); /* instance $114a, engine = acc engine, CHID = $00 */ 210 211 /* (second set) */ 212 ACCW(HT_HANDL_10, (0x80000000 | NV_ROP5_SOLID)); /* 32bit handle */ 213 ACCW(HT_VALUE_10, 0x00101142); /* instance $1142, engine = acc engine, CHID = $00 */ 214 215 ACCW(HT_HANDL_11, (0x80000000 | NV_IMAGE_BLACK_RECTANGLE)); /* 32bit handle */ 216 ACCW(HT_VALUE_11, 0x00101144); /* instance $1144, engine = acc engine, CHID = $00 */ 217 218 ACCW(HT_HANDL_12, (0x80000000 | NV_IMAGE_PATTERN)); /* 32bit handle */ 219 ACCW(HT_VALUE_12, 0x00101146); /* instance $1146, engine = acc engine, CHID = $00 */ 220 } 221 else 222 { 223 /* (first set) */ 224 ACCW(HT_HANDL_00, (0x80000000 | NV4_SURFACE)); /* 32bit handle */ 225 ACCW(HT_VALUE_00, 0x80011145); /* instance $1145, engine = acc engine, CHID = $00 */ 226 227 ACCW(HT_HANDL_01, (0x80000000 | NV_IMAGE_BLIT)); /* 32bit handle */ 228 ACCW(HT_VALUE_01, 0x80011146); /* instance $1146, engine = acc engine, CHID = $00 */ 229 230 ACCW(HT_HANDL_02, (0x80000000 | NV4_GDI_RECTANGLE_TEXT)); /* 32bit handle */ 231 ACCW(HT_VALUE_02, 0x80011147); /* instance $1147, engine = acc engine, CHID = $00 */ 232 233 ACCW(HT_HANDL_03, (0x80000000 | NV4_CONTEXT_SURFACES_ARGB_ZS)); /* 32bit handle (3D) */ 234 ACCW(HT_VALUE_03, 0x80011148); /* instance $1148, engine = acc engine, CHID = $00 */ 235 236 /* NV4_ and NV10_DX5_TEXTURE_TRIANGLE should be identical */ 237 ACCW(HT_HANDL_04, (0x80000000 | NV4_DX5_TEXTURE_TRIANGLE)); /* 32bit handle (3D) */ 238 ACCW(HT_VALUE_04, 0x80011149); /* instance $1149, engine = acc engine, CHID = $00 */ 239 240 /* NV4_ and NV10_DX6_MULTI_TEXTURE_TRIANGLE should be identical */ 241 ACCW(HT_HANDL_05, (0x80000000 | NV4_DX6_MULTI_TEXTURE_TRIANGLE)); /* 32bit handle (not used) */ 242 ACCW(HT_VALUE_05, 0x8001114a); /* instance $114a, engine = acc engine, CHID = $00 */ 243 244 ACCW(HT_HANDL_06, (0x80000000 | NV1_RENDER_SOLID_LIN)); /* 32bit handle (not used) */ 245 ACCW(HT_VALUE_06, 0x8001114b); /* instance $114b, engine = acc engine, CHID = $00 */ 246 247 /* (second set) */ 248 ACCW(HT_HANDL_10, (0x80000000 | NV_ROP5_SOLID)); /* 32bit handle */ 249 ACCW(HT_VALUE_10, 0x80011142); /* instance $1142, engine = acc engine, CHID = $00 */ 250 251 ACCW(HT_HANDL_11, (0x80000000 | NV_IMAGE_BLACK_RECTANGLE)); /* 32bit handle */ 252 ACCW(HT_VALUE_11, 0x80011143); /* instance $1143, engine = acc engine, CHID = $00 */ 253 254 ACCW(HT_HANDL_12, (0x80000000 | NV_IMAGE_PATTERN)); /* 32bit handle */ 255 ACCW(HT_VALUE_12, 0x80011144); /* instance $1144, engine = acc engine, CHID = $00 */ 256 } 257 258 /* program CTX registers: CTX1 is mostly done later (colorspace dependant) */ 259 /* note: 260 * CTX determines which HT handles point to what engine commands. */ 261 /* note also: 262 * CTX registers are in fact in the same GPU internal RAM space as the engine's 263 * hashtable. This means that stuff programmed in here also survives resets and 264 * power-outages! (confirmed NV11) */ 265 if (si->ps.card_arch >= NV40A) 266 { 267 /* setup a DMA define for use by command defines below. */ 268 ACCW(PR_CTX0_R, 0x00003000); /* DMA page table present and of linear type; 269 * DMA target node is NVM (non-volatile memory?) 270 * (instead of doing PCI or AGP transfers) */ 271 ACCW(PR_CTX1_R, (si->ps.memory_size - 1)); /* DMA limit: size is all cardRAM */ 272 ACCW(PR_CTX2_R, ((0x00000000 & 0xfffff000) | 0x00000002)); 273 /* DMA access type is READ_AND_WRITE; 274 * memory starts at start of cardRAM (b12-31): 275 * It's adress needs to be at a 4kb boundary! */ 276 ACCW(PR_CTX3_R, 0x00000002); /* unknown (looks like this is rubbish/not needed?) */ 277 /* setup set '0' for cmd NV_ROP5_SOLID */ 278 ACCW(PR_CTX0_0, 0x02080043); /* NVclass $043, patchcfg ROP_AND, nv10+: little endian */ 279 ACCW(PR_CTX1_0, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 280 ACCW(PR_CTX2_0, 0x00000000); /* DMA0 and DMA1 instance invalid */ 281 ACCW(PR_CTX3_0, 0x00000000); /* method traps disabled */ 282 ACCW(PR_CTX0_1, 0x00000000); /* extra */ 283 ACCW(PR_CTX1_1, 0x00000000); /* extra */ 284 /* setup set '1' for cmd NV_IMAGE_BLACK_RECTANGLE */ 285 ACCW(PR_CTX0_2, 0x02080019); /* NVclass $019, patchcfg ROP_AND, nv10+: little endian */ 286 ACCW(PR_CTX1_2, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 287 ACCW(PR_CTX2_2, 0x00000000); /* DMA0 and DMA1 instance invalid */ 288 ACCW(PR_CTX3_2, 0x00000000); /* method traps disabled */ 289 ACCW(PR_CTX0_3, 0x00000000); /* extra */ 290 ACCW(PR_CTX1_3, 0x00000000); /* extra */ 291 /* setup set '2' for cmd NV_IMAGE_PATTERN */ 292 ACCW(PR_CTX0_4, 0x02080018); /* NVclass $018, patchcfg ROP_AND, nv10+: little endian */ 293 ACCW(PR_CTX1_4, 0x02000000); /* colorspace not set, notify instance is $0200 (b16-31) */ 294 ACCW(PR_CTX2_4, 0x00000000); /* DMA0 and DMA1 instance invalid */ 295 ACCW(PR_CTX3_4, 0x00000000); /* method traps disabled */ 296 ACCW(PR_CTX0_5, 0x00000000); /* extra */ 297 ACCW(PR_CTX1_5, 0x00000000); /* extra */ 298 /* setup set '4' for cmd NV_IMAGE_BLIT */ 299 ACCW(PR_CTX0_6, 0x0208005f); /* NVclass $05f, patchcfg ROP_AND, nv10+: little endian */ 300 ACCW(PR_CTX1_6, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 301 ACCW(PR_CTX2_6, 0x00001140); /* DMA0 instance is $1140, DMA1 instance invalid */ 302 ACCW(PR_CTX3_6, 0x00001140); /* method trap 0 is $1140, trap 1 disabled */ 303 ACCW(PR_CTX0_7, 0x00000000); /* extra */ 304 ACCW(PR_CTX1_7, 0x00000000); /* extra */ 305 /* setup set '5' for cmd NV4_GDI_RECTANGLE_TEXT */ 306 ACCW(PR_CTX0_8, 0x0208004a); /* NVclass $04a, patchcfg ROP_AND, nv10+: little endian */ 307 ACCW(PR_CTX1_8, 0x02000000); /* colorspace not set, notify instance is $0200 (b16-31) */ 308 ACCW(PR_CTX2_8, 0x00000000); /* DMA0 and DMA1 instance invalid */ 309 ACCW(PR_CTX3_8, 0x00000000); /* method traps disabled */ 310 ACCW(PR_CTX0_9, 0x00000000); /* extra */ 311 ACCW(PR_CTX1_9, 0x00000000); /* extra */ 312 /* setup set '6' for cmd NV10_CONTEXT_SURFACES_2D */ 313 ACCW(PR_CTX0_A, 0x02080062); /* NVclass $062, nv10+: little endian */ 314 ACCW(PR_CTX1_A, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 315 ACCW(PR_CTX2_A, 0x00001140); /* DMA0 instance is $1140, DMA1 instance invalid */ 316 ACCW(PR_CTX3_A, 0x00001140); /* method trap 0 is $1140, trap 1 disabled */ 317 ACCW(PR_CTX0_B, 0x00000000); /* extra */ 318 ACCW(PR_CTX1_B, 0x00000000); /* extra */ 319 /* setup DMA set pointed at by PF_CACH1_DMAI */ 320 ACCW(PR_CTX0_C, 0x00003002); /* DMA page table present and of linear type; 321 * DMA class is $002 (b0-11); 322 * DMA target node is NVM (non-volatile memory?) 323 * (instead of doing PCI or AGP transfers) */ 324 ACCW(PR_CTX1_C, 0x00007fff); /* DMA limit: tablesize is 32k bytes */ 325 ACCW(PR_CTX2_C, (((si->ps.memory_size - 1) & 0xffff8000) | 0x00000002)); 326 /* DMA access type is READ_AND_WRITE; 327 * table is located at end of cardRAM (b12-31): 328 * It's adress needs to be at a 4kb boundary! */ 329 } 330 else 331 { 332 /* setup a DMA define for use by command defines below. */ 333 ACCW(PR_CTX0_R, 0x00003000); /* DMA page table present and of linear type; 334 * DMA target node is NVM (non-volatile memory?) 335 * (instead of doing PCI or AGP transfers) */ 336 ACCW(PR_CTX1_R, (si->ps.memory_size - 1)); /* DMA limit: size is all cardRAM */ 337 ACCW(PR_CTX2_R, ((0x00000000 & 0xfffff000) | 0x00000002)); 338 /* DMA access type is READ_AND_WRITE; 339 * memory starts at start of cardRAM (b12-31): 340 * It's adress needs to be at a 4kb boundary! */ 341 ACCW(PR_CTX3_R, 0x00000002); /* unknown (looks like this is rubbish/not needed?) */ 342 /* setup set '0' for cmd NV_ROP5_SOLID */ 343 ACCW(PR_CTX0_0, 0x01008043); /* NVclass $043, patchcfg ROP_AND, nv10+: little endian */ 344 ACCW(PR_CTX1_0, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 345 ACCW(PR_CTX2_0, 0x00000000); /* DMA0 and DMA1 instance invalid */ 346 ACCW(PR_CTX3_0, 0x00000000); /* method traps disabled */ 347 /* setup set '1' for cmd NV_IMAGE_BLACK_RECTANGLE */ 348 ACCW(PR_CTX0_1, 0x01008019); /* NVclass $019, patchcfg ROP_AND, nv10+: little endian */ 349 ACCW(PR_CTX1_1, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 350 ACCW(PR_CTX2_1, 0x00000000); /* DMA0 and DMA1 instance invalid */ 351 ACCW(PR_CTX3_1, 0x00000000); /* method traps disabled */ 352 /* setup set '2' for cmd NV_IMAGE_PATTERN */ 353 ACCW(PR_CTX0_2, 0x01008018); /* NVclass $018, patchcfg ROP_AND, nv10+: little endian */ 354 ACCW(PR_CTX1_2, 0x00000002); /* colorspace not set, notify instance is $0200 (b16-31) */ 355 ACCW(PR_CTX2_2, 0x00000000); /* DMA0 and DMA1 instance invalid */ 356 ACCW(PR_CTX3_2, 0x00000000); /* method traps disabled */ 357 /* setup set '3' for ... */ 358 if(si->ps.card_arch >= NV10A) 359 { 360 /* ... cmd NV10_CONTEXT_SURFACES_2D */ 361 ACCW(PR_CTX0_3, 0x01008062); /* NVclass $062, nv10+: little endian */ 362 } 363 else 364 { 365 /* ... cmd NV4_SURFACE */ 366 ACCW(PR_CTX0_3, 0x01008042); /* NVclass $042, nv10+: little endian */ 367 } 368 ACCW(PR_CTX1_3, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 369 ACCW(PR_CTX2_3, 0x11401140); /* DMA0 instance is $1140, DMA1 instance invalid */ 370 ACCW(PR_CTX3_3, 0x00000000); /* method trap 0 is $1140, trap 1 disabled */ 371 /* setup set '4' for cmd NV_IMAGE_BLIT */ 372 ACCW(PR_CTX0_4, 0x0100805f); /* NVclass $05f, patchcfg ROP_AND, nv10+: little endian */ 373 ACCW(PR_CTX1_4, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 374 ACCW(PR_CTX2_4, 0x11401140); /* DMA0 instance is $1140, DMA1 instance invalid */ 375 ACCW(PR_CTX3_4, 0x00000000); /* method trap 0 is $1140, trap 1 disabled */ 376 /* setup set '5' for cmd NV4_GDI_RECTANGLE_TEXT */ 377 ACCW(PR_CTX0_5, 0x0100804a); /* NVclass $04a, patchcfg ROP_AND, nv10+: little endian */ 378 ACCW(PR_CTX1_5, 0x00000002); /* colorspace not set, notify instance is $0200 (b16-31) */ 379 ACCW(PR_CTX2_5, 0x00000000); /* DMA0 and DMA1 instance invalid */ 380 ACCW(PR_CTX3_5, 0x00000000); /* method traps disabled */ 381 /* setup set '6' ... */ 382 if (si->ps.card_arch >= NV10A) 383 { 384 /* ... for cmd NV10_CONTEXT_SURFACES_ARGB_ZS */ 385 ACCW(PR_CTX0_6, 0x00000093); /* NVclass $093, nv10+: little endian */ 386 } 387 else 388 { 389 /* ... for cmd NV4_CONTEXT_SURFACES_ARGB_ZS */ 390 ACCW(PR_CTX0_6, 0x00000053); /* NVclass $053, nv10+: little endian */ 391 } 392 ACCW(PR_CTX1_6, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 393 ACCW(PR_CTX2_6, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 394 ACCW(PR_CTX3_6, 0x00000000); /* method traps disabled */ 395 /* setup set '7' ... */ 396 if (si->ps.card_arch >= NV10A) 397 { 398 /* ... for cmd NV10_DX5_TEXTURE_TRIANGLE */ 399 ACCW(PR_CTX0_7, 0x0300a094); /* NVclass $094, patchcfg ROP_AND, userclip enable, 400 * context surface0 valid, nv10+: little endian */ 401 } 402 else 403 { 404 /* ... for cmd NV4_DX5_TEXTURE_TRIANGLE */ 405 ACCW(PR_CTX0_7, 0x0300a054); /* NVclass $054, patchcfg ROP_AND, userclip enable, 406 * context surface0 valid */ 407 } 408 ACCW(PR_CTX1_7, 0x00000d01); /* format is A8RGB24, MSB mono */ 409 ACCW(PR_CTX2_7, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 410 ACCW(PR_CTX3_7, 0x00000000); /* method traps disabled */ 411 /* setup set '8' ... */ 412 if (si->ps.card_arch >= NV10A) 413 { 414 /* ... for cmd NV10_DX6_MULTI_TEXTURE_TRIANGLE (not used) */ 415 ACCW(PR_CTX0_8, 0x0300a095); /* NVclass $095, patchcfg ROP_AND, userclip enable, 416 * context surface0 valid, nv10+: little endian */ 417 } 418 else 419 { 420 /* ... for cmd NV4_DX6_MULTI_TEXTURE_TRIANGLE (not used) */ 421 ACCW(PR_CTX0_8, 0x0300a055); /* NVclass $055, patchcfg ROP_AND, userclip enable, 422 * context surface0 valid */ 423 } 424 ACCW(PR_CTX1_8, 0x00000d01); /* format is A8RGB24, MSB mono */ 425 ACCW(PR_CTX2_8, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 426 ACCW(PR_CTX3_8, 0x00000000); /* method traps disabled */ 427 /* setup set '9' for cmd NV1_RENDER_SOLID_LIN (not used) */ 428 ACCW(PR_CTX0_9, 0x0300a01c); /* NVclass $01c, patchcfg ROP_AND, userclip enable, 429 * context surface0 valid, nv10+: little endian */ 430 ACCW(PR_CTX1_9, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 431 ACCW(PR_CTX2_9, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 432 ACCW(PR_CTX3_9, 0x00000000); /* method traps disabled */ 433 /* setup DMA set pointed at by PF_CACH1_DMAI */ 434 if (si->engine.agp_mode) 435 { 436 /* DMA page table present and of linear type; 437 * DMA class is $002 (b0-11); 438 * DMA target node is AGP */ 439 ACCW(PR_CTX0_A, 0x00033002); 440 } 441 else 442 { 443 /* DMA page table present and of linear type; 444 * DMA class is $002 (b0-11); 445 * DMA target node is PCI */ 446 ACCW(PR_CTX0_A, 0x00023002); 447 } 448 ACCW(PR_CTX1_A, 0x000fffff); /* DMA limit: tablesize is 1M bytes */ 449 ACCW(PR_CTX2_A, (((uint32)((uint8 *)(si->dma_buffer_pci))) | 0x00000002)); 450 /* DMA access type is READ_AND_WRITE; 451 * table is located in main system RAM (b12-31): 452 * It's adress needs to be at a 4kb boundary! */ 453 454 //3D stuff: 455 /* 456 rud's (temp.) notes: 457 (problem: 3D driver renders in 32bit whatever the frontbuffer space in DMA mode.) 458 - the colorspace dependant info under 'acc engine' also sets the outcome for the 459 3D add-on. I don't know yet if the 3D render funcs render in the frontbuffer 460 space and the back-to-front blit isn't set (stays in 32bit!) (likely), 461 or if the 3D funcs render always in 32bit space and back-to-front blit color- 462 space converts... I'll try to nail this down at some point. 463 - the colorspace dependant info under 'pramin' is needed to get the 3D related 464 surface commands up and running. An alternate solution would probably be calling 465 the surface command with the colorspace set. 466 */ 467 switch(si->dm.space) 468 { 469 case B_CMAP8: 470 /* acc engine */ 471 ACCW(FORMATS, 0x00001010); 472 if (si->ps.card_arch < NV30A) 473 /* set depth 0-5: $1 = Y8 */ 474 ACCW(BPIXEL, 0x00111111); 475 else 476 /* set depth 0-1: $1 = Y8, $2 = X1R5G5B5_Z1R5G5B5 */ 477 ACCW(BPIXEL, 0x00000021); 478 ACCW(STRD_FMT, 0x03020202); 479 /* PRAMIN */ 480 if (si->ps.card_arch == NV04A) 481 ACCW(PR_CTX1_6, 0x00000302); /* format is X24Y8, LSB mono */ 482 else 483 ACCW(PR_CTX1_6, 0x00000000); /* format is invalid */ 484 ACCW(PR_CTX1_9, 0x00000302); /* format is X24Y8, LSB mono */ 485 break; 486 case B_RGB15_LITTLE: 487 /* acc engine */ 488 ACCW(FORMATS, 0x00002071); 489 if (si->ps.card_arch < NV30A) 490 /* set depth 0-5: $2 = X1R5G5B5_Z1R5G5B5, $6 = Y16 */ 491 ACCW(BPIXEL, 0x00226222); 492 else 493 /* set depth 0-1: $2 = X1R5G5B5_Z1R5G5B5, $4 = A1R5G5B5 */ 494 ACCW(BPIXEL, 0x00000042); 495 ACCW(STRD_FMT, 0x09080808); 496 /* PRAMIN */ 497 ACCW(PR_CTX1_6, 0x00000902); /* format is X17RGB15, LSB mono */ 498 ACCW(PR_CTX1_9, 0x00000902); /* format is X17RGB15, LSB mono */ 499 break; 500 case B_RGB16_LITTLE: 501 /* acc engine */ 502 ACCW(FORMATS, 0x000050C2); 503 if (si->ps.card_arch < NV30A) 504 /* set depth 0-5: $5 = R5G6B5, $6 = Y16 */ 505 ACCW(BPIXEL, 0x00556555); 506 else 507 /* set depth 0-1: $5 = R5G6B5, $a = X1A7R8G8B8_O1A7R8G8B8 */ 508 ACCW(BPIXEL, 0x000000a5); 509 if (si->ps.card_arch == NV04A) 510 ACCW(STRD_FMT, 0x0c0b0b0b); 511 else 512 ACCW(STRD_FMT, 0x000b0b0c); 513 /* PRAMIN */ 514 ACCW(PR_CTX1_6, 0x00000c02); /* format is X16RGB16, LSB mono */ 515 ACCW(PR_CTX1_9, 0x00000c02); /* format is X16RGB16, LSB mono */ 516 break; 517 case B_RGB32_LITTLE: 518 case B_RGBA32_LITTLE: 519 /* acc engine */ 520 ACCW(FORMATS, 0x000070e5); 521 if (si->ps.card_arch < NV30A) 522 /* set depth 0-5: $7 = X8R8G8B8_Z8R8G8B8, $d = Y32 */ 523 ACCW(BPIXEL, 0x0077d777); 524 else 525 /* set depth 0-1: $7 = X8R8G8B8_Z8R8G8B8, $e = V8YB8U8YA8 */ 526 ACCW(BPIXEL, 0x000000e7); 527 ACCW(STRD_FMT, 0x0e0d0d0d); 528 /* PRAMIN */ 529 ACCW(PR_CTX1_6, 0x00000e02); /* format is X8RGB24, LSB mono */ 530 ACCW(PR_CTX1_9, 0x00000e02); /* format is X8RGB24, LSB mono */ 531 break; 532 default: 533 LOG(8,("ACC: init, invalid bit depth\n")); 534 return B_ERROR; 535 } 536 //end 3D stuff. 537 } 538 539 if (si->ps.card_arch == NV04A) 540 { 541 /* do a explicit engine reset */ 542 ACCW(DEBUG0, 0x000001ff); 543 544 /* init some function blocks */ 545 ACCW(DEBUG0, 0x1230c000); 546 ACCW(DEBUG1, 0x72111101); 547 ACCW(DEBUG2, 0x11d5f071); 548 ACCW(DEBUG3, 0x0004ff31); 549 /* init OP methods */ 550 ACCW(DEBUG3, 0x4004ff31); 551 552 /* disable all acceleration engine INT reguests */ 553 ACCW(ACC_INTE, 0x00000000); 554 /* reset all acceration engine INT status bits */ 555 ACCW(ACC_INTS, 0xffffffff); 556 /* context control enabled */ 557 ACCW(NV04_CTX_CTRL, 0x10010100); 558 /* all acceleration buffers, pitches and colors are valid */ 559 ACCW(NV04_ACC_STAT, 0xffffffff); 560 /* enable acceleration engine command FIFO */ 561 ACCW(FIFO_EN, 0x00000001); 562 563 /* setup location of active screen in framebuffer */ 564 ACCW(OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 565 ACCW(OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 566 /* setup accesible card memory range */ 567 ACCW(BLIMIT0, (si->ps.memory_size - 1)); 568 ACCW(BLIMIT1, (si->ps.memory_size - 1)); 569 570 /* pattern shape value = 8x8, 2 color */ 571 //fixme: not needed, unless the engine has a hardware fault (setting via cmd)! 572 //ACCW(PAT_SHP, 0x00000000); 573 /* Pgraph Beta AND value (fraction) b23-30 */ 574 ACCW(BETA_AND_VAL, 0xffffffff); 575 } 576 else 577 { 578 /* do a explicit engine reset */ 579 ACCW(DEBUG0, 0xffffffff); 580 ACCW(DEBUG0, 0x00000000); 581 /* disable all acceleration engine INT reguests */ 582 ACCW(ACC_INTE, 0x00000000); 583 /* reset all acceration engine INT status bits */ 584 ACCW(ACC_INTS, 0xffffffff); 585 /* context control enabled */ 586 ACCW(NV10_CTX_CTRL, 0x10010100); 587 /* all acceleration buffers, pitches and colors are valid */ 588 ACCW(NV10_ACC_STAT, 0xffffffff); 589 /* enable acceleration engine command FIFO */ 590 ACCW(FIFO_EN, 0x00000001); 591 /* setup surface type: 592 * b1-0 = %01 = surface type is non-swizzle; 593 * this is needed to enable 3D on NV1x (confirmed) and maybe others? */ 594 ACCW(NV10_SURF_TYP, ((ACCR(NV10_SURF_TYP)) & 0x0007ff00)); 595 ACCW(NV10_SURF_TYP, ((ACCR(NV10_SURF_TYP)) | 0x00020101)); 596 } 597 598 if (si->ps.card_arch == NV10A) 599 { 600 /* init some function blocks */ 601 ACCW(DEBUG1, 0x00118700); 602 /* DEBUG2 has a big influence on 3D speed for NV15 (confirmed) */ 603 ACCW(DEBUG2, 0x24f82ad9); 604 ACCW(DEBUG3, 0x55de0030); 605 606 /* copy tile setup stuff from 'source' to acc engine */ 607 for (cnt = 0; cnt < 32; cnt++) 608 { 609 NV_REG32(NVACC_NV10_TIL0AD + (cnt << 2)) = 610 NV_REG32(NVACC_NV10_FBTIL0AD + (cnt << 2)); 611 } 612 613 /* setup location of active screen in framebuffer */ 614 ACCW(OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 615 ACCW(OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 616 /* setup accesible card memory range */ 617 ACCW(BLIMIT0, (si->ps.memory_size - 1)); 618 ACCW(BLIMIT1, (si->ps.memory_size - 1)); 619 620 /* pattern shape value = 8x8, 2 color */ 621 //fixme: not needed, unless the engine has a hardware fault (setting via cmd)! 622 //ACCW(PAT_SHP, 0x00000000); 623 /* Pgraph Beta AND value (fraction) b23-30 */ 624 ACCW(BETA_AND_VAL, 0xffffffff); 625 } 626 627 if (si->ps.card_arch >= NV20A) 628 { 629 switch (si->ps.card_arch) 630 { 631 case NV40A: 632 /* init some function blocks */ 633 ACCW(DEBUG1, 0x401287c0); 634 ACCW(DEBUG3, 0x60de8051); 635 /* disable specific functions, but enable SETUP_SPARE2 register */ 636 ACCW(NV10_DEBUG4, 0x00008000); 637 /* set limit_viol_pix_adress(?): more likely something unknown.. */ 638 ACCW(NV25_WHAT0, 0x00be3c5f); 639 640 /* unknown.. */ 641 switch (si->ps.card_type) 642 { 643 case NV40: 644 case NV45: 645 ACCW(NV40_WHAT0, 0x83280fff); 646 ACCW(NV40_WHAT1, 0x000000a0); 647 ACCW(NV40_WHAT2, 0x0078e366); 648 ACCW(NV40_WHAT3, 0x0000014c); 649 break; 650 case NV41: 651 ACCW(NV40P_WHAT0, 0x83280eff); 652 ACCW(NV40P_WHAT1, 0x000000a0); 653 ACCW(NV40P_WHAT2, 0x007596ff); 654 ACCW(NV40P_WHAT3, 0x00000108); 655 break; 656 case NV43: 657 ACCW(NV40P_WHAT0, 0x83280eff); 658 ACCW(NV40P_WHAT1, 0x000000a0); 659 ACCW(NV40P_WHAT2, 0x0072cb77); 660 ACCW(NV40P_WHAT3, 0x00000108); 661 break; 662 case NV44: 663 ACCW(NV40P_WHAT0, 0x83280eff); 664 ACCW(NV40P_WHAT1, 0x000000a0); 665 666 NV_REG32(NV32_NV44_WHAT10) = NV_REG32(NV32_NV10STRAPINFO); 667 NV_REG32(NV32_NV44_WHAT11) = 0x00000000; 668 NV_REG32(NV32_NV44_WHAT12) = 0x00000000; 669 NV_REG32(NV32_NV44_WHAT13) = NV_REG32(NV32_NV10STRAPINFO); 670 671 ACCW(NV44_WHAT2, 0x00000000); 672 ACCW(NV44_WHAT3, 0x00000000); 673 break; 674 default: 675 ACCW(NV40P_WHAT0, 0x83280eff); 676 ACCW(NV40P_WHAT1, 0x000000a0); 677 break; 678 } 679 680 ACCW(NV10_TIL3PT, 0x2ffff800); 681 ACCW(NV10_TIL3ST, 0x00006000); 682 ACCW(NV4X_WHAT1, 0x01000000); 683 /* engine data source DMA instance = $1140 */ 684 ACCW(NV4X_DMA_SRC, 0x00001140); 685 break; 686 case NV30A: 687 /* init some function blocks, but most is unknown.. */ 688 ACCW(DEBUG1, 0x40108700); 689 ACCW(NV25_WHAT1, 0x00140000); 690 ACCW(DEBUG3, 0xf00e0431); 691 ACCW(NV10_DEBUG4, 0x00008000); 692 ACCW(NV25_WHAT0, 0xf04b1f36); 693 ACCW(NV20_WHAT3, 0x1002d888); 694 ACCW(NV25_WHAT2, 0x62ff007f); 695 break; 696 case NV20A: 697 /* init some function blocks, but most is unknown.. */ 698 ACCW(DEBUG1, 0x00118700); 699 ACCW(DEBUG3, 0xf20e0431); 700 ACCW(NV10_DEBUG4, 0x00000000); 701 ACCW(NV20_WHAT1, 0x00000040); 702 if (si->ps.card_type < NV25) 703 { 704 ACCW(NV20_WHAT2, 0x00080000); 705 ACCW(NV10_DEBUG5, 0x00000005); 706 ACCW(NV20_WHAT3, 0x45caa208); 707 ACCW(NV20_WHAT4, 0x24000000); 708 ACCW(NV20_WHAT5, 0x00000040); 709 710 /* copy some fixed RAM(?) configuration info(?) to some indexed registers: */ 711 /* b16-24 is select; b2-13 is adress in 32-bit words */ 712 ACCW(RDI_INDEX, 0x00e00038); 713 /* data is 32-bit */ 714 ACCW(RDI_DATA, 0x00000030); 715 /* copy some fixed RAM(?) configuration info(?) to some indexed registers: */ 716 /* b16-24 is select; b2-13 is adress in 32-bit words */ 717 ACCW(RDI_INDEX, 0x00e10038); 718 /* data is 32-bit */ 719 ACCW(RDI_DATA, 0x00000030); 720 } 721 else 722 { 723 ACCW(NV25_WHAT1, 0x00080000); 724 ACCW(NV25_WHAT0, 0x304b1fb6); 725 ACCW(NV20_WHAT3, 0x18b82880); 726 ACCW(NV20_WHAT4, 0x44000000); 727 ACCW(NV20_WHAT5, 0x40000080); 728 ACCW(NV25_WHAT2, 0x000000ff); 729 } 730 break; 731 } 732 733 /* NV20A, NV30A and NV40A: */ 734 /* copy tile setup stuff from 'source' to acc engine (pattern colorRAM?) */ 735 for (cnt = 0; cnt < 32; cnt++) 736 { 737 NV_REG32(NVACC_NV20_WHAT0 + (cnt << 2)) = 738 NV_REG32(NVACC_NV10_FBTIL0AD + (cnt << 2)); 739 } 740 741 if (si->ps.card_arch >= NV40A) 742 { 743 if ((si->ps.card_type == NV40) || (si->ps.card_type == NV45)) 744 { 745 /* copy some RAM configuration info(?) */ 746 ACCW(NV20_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0)); 747 ACCW(NV20_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1)); 748 ACCW(NV40_WHAT_T2, NV_REG32(NV32_PFB_CONFIG_0)); 749 ACCW(NV40_WHAT_T3, NV_REG32(NV32_PFB_CONFIG_1)); 750 751 /* setup location of active screen in framebuffer */ 752 ACCW(NV20_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 753 ACCW(NV20_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 754 /* setup accesible card memory range */ 755 ACCW(NV20_BLIMIT6, (si->ps.memory_size - 1)); 756 ACCW(NV20_BLIMIT7, (si->ps.memory_size - 1)); 757 } 758 else 759 { 760 /* copy some RAM configuration info(?) */ 761 ACCW(NV40P_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0)); 762 ACCW(NV40P_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1)); 763 ACCW(NV40P_WHAT_T2, NV_REG32(NV32_PFB_CONFIG_0)); 764 ACCW(NV40P_WHAT_T3, NV_REG32(NV32_PFB_CONFIG_1)); 765 766 /* setup location of active screen in framebuffer */ 767 ACCW(NV40P_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 768 ACCW(NV40P_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 769 /* setup accesible card memory range */ 770 ACCW(NV40P_BLIMIT6, (si->ps.memory_size - 1)); 771 ACCW(NV40P_BLIMIT7, (si->ps.memory_size - 1)); 772 } 773 } 774 else /* NV20A and NV30A: */ 775 { 776 /* copy some RAM configuration info(?) */ 777 ACCW(NV20_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0)); 778 ACCW(NV20_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1)); 779 /* copy some RAM configuration info(?) to some indexed registers: */ 780 /* b16-24 is select; b2-13 is adress in 32-bit words */ 781 ACCW(RDI_INDEX, 0x00ea0000); 782 /* data is 32-bit */ 783 ACCW(RDI_DATA, NV_REG32(NV32_PFB_CONFIG_0)); 784 /* b16-24 is select; b2-13 is adress in 32-bit words */ 785 ACCW(RDI_INDEX, 0x00ea0004); 786 /* data is 32-bit */ 787 ACCW(RDI_DATA, NV_REG32(NV32_PFB_CONFIG_1)); 788 789 /* setup location of active screen in framebuffer */ 790 ACCW(NV20_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 791 ACCW(NV20_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 792 /* setup accesible card memory range */ 793 ACCW(NV20_BLIMIT6, (si->ps.memory_size - 1)); 794 ACCW(NV20_BLIMIT7, (si->ps.memory_size - 1)); 795 } 796 797 /* NV20A, NV30A and NV40A: */ 798 /* setup some acc engine tile stuff */ 799 ACCW(NV10_TIL2AD, 0x00000000); 800 ACCW(NV10_TIL0ED, 0xffffffff); 801 } 802 803 /* all cards: */ 804 /* setup clipping: rect size is 32768 x 32768, probably max. setting */ 805 /* note: 806 * can also be done via the NV_IMAGE_BLACK_RECTANGLE engine command. */ 807 ACCW(ABS_UCLP_XMIN, 0x00000000); 808 ACCW(ABS_UCLP_YMIN, 0x00000000); 809 ACCW(ABS_UCLP_XMAX, 0x00007fff); 810 ACCW(ABS_UCLP_YMAX, 0x00007fff); 811 812 /*** PFIFO ***/ 813 /* (setup caches) */ 814 /* disable caches reassign */ 815 ACCW(PF_CACHES, 0x00000000); 816 /* PFIFO mode: channel 0 is in DMA mode, channels 1 - 32 are in PIO mode */ 817 ACCW(PF_MODE, 0x00000001); 818 /* cache1 push0 access disabled */ 819 ACCW(PF_CACH1_PSH0, 0x00000000); 820 /* cache1 pull0 access disabled */ 821 ACCW(PF_CACH1_PUL0, 0x00000000); 822 /* cache1 push1 mode = DMA */ 823 if (si->ps.card_arch >= NV40A) 824 ACCW(PF_CACH1_PSH1, 0x00010000); 825 else 826 ACCW(PF_CACH1_PSH1, 0x00000100); 827 /* cache1 DMA Put offset = 0 (b2-28) */ 828 ACCW(PF_CACH1_DMAP, 0x00000000); 829 /* cache1 DMA Get offset = 0 (b2-28) */ 830 ACCW(PF_CACH1_DMAG, 0x00000000); 831 /* cache1 DMA instance adress = $114e (b0-15); 832 * instance being b4-19 with baseadress NV_PRAMIN_CTX_0 (0x00700000). */ 833 /* note: 834 * should point to a DMA definition in CTX register space (which is sort of RAM). 835 * This define tells the engine where the DMA cmd buffer is and what it's size is. 836 * Inside that cmd buffer you'll find the actual issued engine commands. */ 837 if (si->ps.card_arch >= NV40A) 838 ACCW(PF_CACH1_DMAI, 0x0000114e); 839 else 840 ACCW(PF_CACH1_DMAI, 0x0000114c); 841 /* cache0 push0 access disabled */ 842 ACCW(PF_CACH0_PSH0, 0x00000000); 843 /* cache0 pull0 access disabled */ 844 ACCW(PF_CACH0_PUL0, 0x00000000); 845 /* RAM HT (hash table) baseadress = $10000 (b4-8), size = 4k, 846 * search = 128 (is byte offset between hash 'sets') */ 847 /* note: 848 * so HT base is $00710000, last is $00710fff. 849 * In this space you define the engine command handles (HT_HANDL_XX), which 850 * in turn points to the defines in CTX register space (which is sort of RAM) */ 851 ACCW(PF_RAMHT, 0x03000100); 852 /* RAM FC baseadress = $11000 (b3-8) (size is fixed to 0.5k(?)) */ 853 /* note: 854 * so FC base is $00711000, last is $007111ff. (not used?) */ 855 ACCW(PF_RAMFC, 0x00000110); 856 /* RAM RO baseadress = $11200 (b1-8), size = 0.5k */ 857 /* note: 858 * so RO base is $00711200, last is $007113ff. (not used?) */ 859 /* note also: 860 * This means(?) the PRAMIN CTX registers are accessible from base $00711400. */ 861 ACCW(PF_RAMRO, 0x00000112); 862 /* PFIFO size: ch0-15 = 512 bytes, ch16-31 = 124 bytes */ 863 ACCW(PF_SIZE, 0x0000ffff); 864 /* cache1 hash instance = $ffff (b0-15) */ 865 ACCW(PF_CACH1_HASH, 0x0000ffff); 866 /* disable all PFIFO INTs */ 867 ACCW(PF_INTEN, 0x00000000); 868 /* reset all PFIFO INT status bits */ 869 ACCW(PF_INTSTAT, 0xffffffff); 870 /* cache0 pull0 engine = acceleration engine (graphics) */ 871 ACCW(PF_CACH0_PUL1, 0x00000001); 872 /* cache1 DMA control: disable some stuff */ 873 ACCW(PF_CACH1_DMAC, 0x00000000); 874 /* cache1 engine 0 upto/including 7 is software (could also be graphics or DVD) */ 875 ACCW(PF_CACH1_ENG, 0x00000000); 876 /* cache1 DMA fetch: trigger at 128 bytes, size is 32 bytes, max requests is 15, 877 * use little endian */ 878 ACCW(PF_CACH1_DMAF, 0x000f0078); 879 /* cache1 DMA push: b0 = 1: access is enabled */ 880 ACCW(PF_CACH1_DMAS, 0x00000001); 881 /* cache1 push0 access enabled */ 882 ACCW(PF_CACH1_PSH0, 0x00000001); 883 /* cache1 pull0 access enabled */ 884 ACCW(PF_CACH1_PUL0, 0x00000001); 885 /* cache1 pull1 engine = acceleration engine (graphics) */ 886 ACCW(PF_CACH1_PUL1, 0x00000001); 887 /* enable PFIFO caches reassign */ 888 ACCW(PF_CACHES, 0x00000001); 889 890 /* setup 3D specifics */ 891 nv_init_for_3D_dma(); 892 893 /*** init acceleration engine command info ***/ 894 /* set object handles */ 895 /* note: 896 * probably depending on some other setup, there are 8 or 32 FIFO channels 897 * available. Assuming the current setup only has 8 channels because the 'rest' 898 * isn't setup here... */ 899 si->engine.fifo.handle[0] = NV_ROP5_SOLID; 900 si->engine.fifo.handle[1] = NV_IMAGE_BLACK_RECTANGLE; 901 si->engine.fifo.handle[2] = NV_IMAGE_PATTERN; 902 si->engine.fifo.handle[3] = NV4_SURFACE; /* NV10_CONTEXT_SURFACES_2D is identical */ 903 si->engine.fifo.handle[4] = NV_IMAGE_BLIT; 904 si->engine.fifo.handle[5] = NV4_GDI_RECTANGLE_TEXT; 905 //fixme: nolonger switching FIFO assignment for 3D as doing that causes trouble when 906 //overlay is concurrently active!!!! 907 //we can forego switching for now as we had FIFO CH6 still unused... 908 //(note btw: switching has no noticable slowdown: measured 0.2% with Quake2) 909 si->engine.fifo.handle[6] = NV4_CONTEXT_SURFACES_ARGB_ZS;//NV1_RENDER_SOLID_LIN; 910 si->engine.fifo.handle[7] = NV4_DX5_TEXTURE_TRIANGLE; 911 /* preset no FIFO channels assigned to cmd's */ 912 for (cnt = 0; cnt < 0x20; cnt++) 913 { 914 si->engine.fifo.ch_ptr[cnt] = 0; 915 } 916 /* set handle's pointers to their assigned FIFO channels */ 917 /* note: 918 * b0-1 aren't used as adressbits. Using b0 to indicate a valid pointer. */ 919 for (cnt = 0; cnt < 0x08; cnt++) 920 { 921 si->engine.fifo.ch_ptr[(si->engine.fifo.handle[cnt])] = 922 (0x00000001 + (cnt * 0x00002000)); 923 } 924 925 /*** init DMA command buffer info ***/ 926 if (si->ps.card_arch >= NV40A) //main mem DMA buf on pre-NV40 927 { 928 si->dma_buffer = (void *)((char *)si->framebuffer + 929 ((si->ps.memory_size - 1) & 0xffff8000)); 930 } 931 LOG(4,("ACC_DMA: command buffer is at adress $%08x\n", 932 ((uint32)(si->dma_buffer)))); 933 /* we have issued no DMA cmd's to the engine yet */ 934 si->engine.dma.put = 0; 935 /* the current first free adress in the DMA buffer is at offset 0 */ 936 si->engine.dma.current = 0; 937 /* the DMA buffer can hold 8k 32-bit words (it's 32kb in size), 938 * or 256k 32-bit words (1Mb in size) dependant on architecture (for now) */ 939 /* note: 940 * one word is reserved at the end of the DMA buffer to be able to instruct the 941 * engine to do a buffer wrap-around! 942 * (DMA opcode 'noninc method': issue word $20000000.) */ 943 if (si->ps.card_arch < NV40A) 944 si->engine.dma.max = ((1 * 1024 * 1024) >> 2) - 1; 945 else 946 si->engine.dma.max = 8192 - 1; 947 /* note the current free space we have left in the DMA buffer */ 948 si->engine.dma.free = si->engine.dma.max - si->engine.dma.current; 949 950 /*** init FIFO via DMA command buffer. ***/ 951 /* wait for room in fifo for new FIFO assigment cmds if needed: */ 952 if (si->ps.card_arch >= NV40A) 953 { 954 if (nv_acc_fifofree_dma(12) != B_OK) return B_ERROR; 955 } 956 else 957 { 958 if (nv_acc_fifofree_dma(16) != B_OK) return B_ERROR; 959 } 960 961 /* program new FIFO assignments */ 962 /* Raster OPeration: */ 963 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH0, si->engine.fifo.handle[0]); 964 /* Clip: */ 965 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH1, si->engine.fifo.handle[1]); 966 /* Pattern: */ 967 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH2, si->engine.fifo.handle[2]); 968 /* 2D Surfaces: */ 969 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH3, si->engine.fifo.handle[3]); 970 /* Blit: */ 971 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH4, si->engine.fifo.handle[4]); 972 /* Bitmap: */ 973 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH5, si->engine.fifo.handle[5]); 974 if (si->ps.card_arch < NV40A) 975 { 976 /* 3D surfaces: (3D related only) */ 977 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH6, si->engine.fifo.handle[6]); 978 /* Textured Triangle: (3D only) */ 979 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH7, si->engine.fifo.handle[7]); 980 } 981 982 /*** Set pixel width ***/ 983 switch(si->dm.space) 984 { 985 case B_CMAP8: 986 surf_depth = 0x00000001; 987 cmd_depth = 0x00000003; 988 break; 989 case B_RGB15_LITTLE: 990 case B_RGB16_LITTLE: 991 surf_depth = 0x00000004; 992 cmd_depth = 0x00000001; 993 break; 994 case B_RGB32_LITTLE: 995 case B_RGBA32_LITTLE: 996 surf_depth = 0x00000006; 997 cmd_depth = 0x00000003; 998 break; 999 default: 1000 LOG(8,("ACC_DMA: init, invalid bit depth\n")); 1001 return B_ERROR; 1002 } 1003 1004 /* wait for room in fifo for surface setup cmd if needed */ 1005 if (nv_acc_fifofree_dma(5) != B_OK) return B_ERROR; 1006 /* now setup 2D surface (writing 5 32bit words) */ 1007 nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 4); 1008 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = surf_depth; /* Format */ 1009 /* setup screen pitch */ 1010 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1011 ((si->fbc.bytes_per_row & 0x0000ffff) | (si->fbc.bytes_per_row << 16)); /* Pitch */ 1012 /* setup screen location */ 1013 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1014 ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer); /* OffsetSource */ 1015 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1016 ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer); /* OffsetDest */ 1017 1018 /* wait for room in fifo for pattern colordepth setup cmd if needed */ 1019 if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR; 1020 /* set pattern colordepth (writing 2 32bit words) */ 1021 nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETCOLORFORMAT, 1); 1022 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ 1023 1024 /* wait for room in fifo for bitmap colordepth setup cmd if needed */ 1025 if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR; 1026 /* set bitmap colordepth (writing 2 32bit words) */ 1027 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_SETCOLORFORMAT, 1); 1028 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ 1029 1030 /* Load our pattern into the engine: */ 1031 /* wait for room in fifo for pattern cmd if needed. */ 1032 if (nv_acc_fifofree_dma(7) != B_OK) return B_ERROR; 1033 /* now setup pattern (writing 7 32bit words) */ 1034 nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETSHAPE, 1); 1035 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000000; /* SetShape: 0 = 8x8, 1 = 64x1, 2 = 1x64 */ 1036 nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETCOLOR0, 4); 1037 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetColor0 */ 1038 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetColor1 */ 1039 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetPattern[0] */ 1040 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetPattern[1] */ 1041 1042 /* concurrent overlay/3D 'workaround': 1043 * we _must_ execute a 3D command before overlay is started to prevent a hard 1044 * engine crash! Drawing a small rectangle (Z-only) containing rubbish. */ 1045 /* note: 1046 * 3D only works on pre-NV20 currently... */ 1047 if (si->ps.card_arch < NV20A) 1048 { 1049 /* wait for room in fifo for 3D 'workaround' cmd if needed */ 1050 if (nv_acc_fifofree_dma(50) != B_OK) return B_ERROR; 1051 1052 /* setup fake 3D surfaces: */ 1053 nv_acc_cmd_dma(NV4_CONTEXT_SURFACES_ARGB_ZS, NV4_CONTEXT_SURFACES_ARGB_ZS_PITCH, 3); 1054 /* Set minimum pitch (granularity) required by hardware */ 1055 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 64 | (64 << 16); /* Pitches */ 1056 /* Place colorbuffer in Desktop */ 1057 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1058 ((uint32)si->fbc.frame_buffer - (uint32)si->framebuffer); /* SetOffsetColor */ 1059 /* Place Z-buffer in Desktop */ 1060 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1061 ((uint32)si->fbc.frame_buffer - (uint32)si->framebuffer); /* SetOffsetZeta */ 1062 1063 /* Set a valid 3D state (write Z-buffer only): texture is in Desktop */ 1064 RIVA_STATE3D_05(((uint32)si->fbc.frame_buffer - (uint32)si->framebuffer), 1065 0x11221551, 0x11000000, 0x21100162, 0x41186800); 1066 /* Enter a small two dimensional quad */ 1067 RIVA_VERTEX3D_05(0, 0, 0); 1068 RIVA_VERTEX3D_05(1, 16, 0); 1069 RIVA_VERTEX3D_05(2, 16, 16); 1070 RIVA_VERTEX3D_05(3, 0, 16); 1071 /* Render quad */ 1072 RIVA_DRAWQUAD3D_05(0, 1, 2, 3); 1073 } 1074 1075 /* tell the engine to fetch and execute all (new) commands in the DMA buffer */ 1076 nv_start_dma(); 1077 1078 return B_OK; 1079 } 1080 1081 static void nv_init_for_3D_dma(void) 1082 { 1083 /* setup PGRAPH unknown registers and modify (pre-cleared) pipe stuff for 3D use */ 1084 if (si->ps.card_arch >= NV10A) 1085 { 1086 /* setup unknown PGRAPH stuff */ 1087 ACCW(PGWHAT_00, 0x00000000); 1088 ACCW(PGWHAT_01, 0x00000000); 1089 ACCW(PGWHAT_02, 0x00000000); 1090 ACCW(PGWHAT_03, 0x00000000); 1091 1092 ACCW(PGWHAT_04, 0x00001000); 1093 ACCW(PGWHAT_05, 0x00001000); 1094 ACCW(PGWHAT_06, 0x4003ff80); 1095 1096 ACCW(PGWHAT_07, 0x00000000); 1097 ACCW(PGWHAT_08, 0x00000000); 1098 ACCW(PGWHAT_09, 0x00000000); 1099 ACCW(PGWHAT_0A, 0x00000000); 1100 ACCW(PGWHAT_0B, 0x00000000); 1101 1102 ACCW(PGWHAT_0C, 0x00080008); 1103 ACCW(PGWHAT_0D, 0x00080008); 1104 1105 ACCW(PGWHAT_0E, 0x00000000); 1106 ACCW(PGWHAT_0F, 0x00000000); 1107 ACCW(PGWHAT_10, 0x00000000); 1108 ACCW(PGWHAT_11, 0x00000000); 1109 ACCW(PGWHAT_12, 0x00000000); 1110 ACCW(PGWHAT_13, 0x00000000); 1111 ACCW(PGWHAT_14, 0x00000000); 1112 ACCW(PGWHAT_15, 0x00000000); 1113 ACCW(PGWHAT_16, 0x00000000); 1114 ACCW(PGWHAT_17, 0x00000000); 1115 ACCW(PGWHAT_18, 0x00000000); 1116 1117 ACCW(PGWHAT_19, 0x10000000); 1118 1119 ACCW(PGWHAT_1A, 0x00000000); 1120 ACCW(PGWHAT_1B, 0x00000000); 1121 ACCW(PGWHAT_1C, 0x00000000); 1122 ACCW(PGWHAT_1D, 0x00000000); 1123 ACCW(PGWHAT_1E, 0x00000000); 1124 ACCW(PGWHAT_1F, 0x00000000); 1125 ACCW(PGWHAT_20, 0x00000000); 1126 ACCW(PGWHAT_21, 0x00000000); 1127 1128 ACCW(PGWHAT_22, 0x08000000); 1129 1130 ACCW(PGWHAT_23, 0x00000000); 1131 ACCW(PGWHAT_24, 0x00000000); 1132 ACCW(PGWHAT_25, 0x00000000); 1133 ACCW(PGWHAT_26, 0x00000000); 1134 1135 ACCW(PGWHAT_27, 0x4b7fffff); 1136 1137 ACCW(PGWHAT_28, 0x00000000); 1138 ACCW(PGWHAT_29, 0x00000000); 1139 ACCW(PGWHAT_2A, 0x00000000); 1140 1141 /* setup window clipping */ 1142 /* b0-11 = min; b16-27 = max. 1143 * note: 1144 * probably two's complement values, so setting to max range here: 1145 * which would be -2048 upto/including +2047. */ 1146 /* horizontal */ 1147 ACCW(WINCLIP_H_0, 0x07ff0800); 1148 ACCW(WINCLIP_H_1, 0x07ff0800); 1149 ACCW(WINCLIP_H_2, 0x07ff0800); 1150 ACCW(WINCLIP_H_3, 0x07ff0800); 1151 ACCW(WINCLIP_H_4, 0x07ff0800); 1152 ACCW(WINCLIP_H_5, 0x07ff0800); 1153 ACCW(WINCLIP_H_6, 0x07ff0800); 1154 ACCW(WINCLIP_H_7, 0x07ff0800); 1155 /* vertical */ 1156 ACCW(WINCLIP_V_0, 0x07ff0800); 1157 ACCW(WINCLIP_V_1, 0x07ff0800); 1158 ACCW(WINCLIP_V_2, 0x07ff0800); 1159 ACCW(WINCLIP_V_3, 0x07ff0800); 1160 ACCW(WINCLIP_V_4, 0x07ff0800); 1161 ACCW(WINCLIP_V_5, 0x07ff0800); 1162 ACCW(WINCLIP_V_6, 0x07ff0800); 1163 ACCW(WINCLIP_V_7, 0x07ff0800); 1164 1165 /* setup (initialize) pipe: 1166 * needed to get valid 3D rendering on (at least) NV1x cards. Without this 1167 * those cards produce rubbish instead of 3D, although the engine itself keeps 1168 * running and 2D stays OK. */ 1169 1170 /* set eyetype to local, lightning etc. is off */ 1171 ACCW(NV10_XFMOD0, 0x10000000); 1172 /* disable all lights */ 1173 ACCW(NV10_XFMOD1, 0x00000000); 1174 1175 /* note: upon writing data into the PIPEDAT register, the PIPEADR is 1176 * probably auto-incremented! */ 1177 /* (pipe adress = b2-16, pipe data = b0-31) */ 1178 /* note: pipe adresses IGRAPH registers? */ 1179 ACCW(NV10_PIPEADR, 0x00006740); 1180 ACCW(NV10_PIPEDAT, 0x00000000); 1181 ACCW(NV10_PIPEDAT, 0x00000000); 1182 ACCW(NV10_PIPEDAT, 0x00000000); 1183 ACCW(NV10_PIPEDAT, 0x3f800000); 1184 1185 ACCW(NV10_PIPEADR, 0x00006750); 1186 ACCW(NV10_PIPEDAT, 0x40000000); 1187 ACCW(NV10_PIPEDAT, 0x40000000); 1188 ACCW(NV10_PIPEDAT, 0x40000000); 1189 ACCW(NV10_PIPEDAT, 0x40000000); 1190 1191 ACCW(NV10_PIPEADR, 0x00006760); 1192 ACCW(NV10_PIPEDAT, 0x00000000); 1193 ACCW(NV10_PIPEDAT, 0x00000000); 1194 ACCW(NV10_PIPEDAT, 0x3f800000); 1195 ACCW(NV10_PIPEDAT, 0x00000000); 1196 1197 ACCW(NV10_PIPEADR, 0x00006770); 1198 ACCW(NV10_PIPEDAT, 0xc5000000); 1199 ACCW(NV10_PIPEDAT, 0xc5000000); 1200 ACCW(NV10_PIPEDAT, 0x00000000); 1201 ACCW(NV10_PIPEDAT, 0x00000000); 1202 1203 ACCW(NV10_PIPEADR, 0x00006780); 1204 ACCW(NV10_PIPEDAT, 0x00000000); 1205 ACCW(NV10_PIPEDAT, 0x00000000); 1206 ACCW(NV10_PIPEDAT, 0x3f800000); 1207 ACCW(NV10_PIPEDAT, 0x00000000); 1208 1209 ACCW(NV10_PIPEADR, 0x000067a0); 1210 ACCW(NV10_PIPEDAT, 0x3f800000); 1211 ACCW(NV10_PIPEDAT, 0x3f800000); 1212 ACCW(NV10_PIPEDAT, 0x3f800000); 1213 ACCW(NV10_PIPEDAT, 0x3f800000); 1214 1215 ACCW(NV10_PIPEADR, 0x00006ab0); 1216 ACCW(NV10_PIPEDAT, 0x3f800000); 1217 ACCW(NV10_PIPEDAT, 0x3f800000); 1218 ACCW(NV10_PIPEDAT, 0x3f800000); 1219 1220 ACCW(NV10_PIPEADR, 0x00006ac0); 1221 ACCW(NV10_PIPEDAT, 0x00000000); 1222 ACCW(NV10_PIPEDAT, 0x00000000); 1223 ACCW(NV10_PIPEDAT, 0x00000000); 1224 1225 ACCW(NV10_PIPEADR, 0x00006c10); 1226 ACCW(NV10_PIPEDAT, 0xbf800000); 1227 1228 ACCW(NV10_PIPEADR, 0x00007030); 1229 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1230 1231 ACCW(NV10_PIPEADR, 0x00007040); 1232 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1233 1234 ACCW(NV10_PIPEADR, 0x00007050); 1235 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1236 1237 ACCW(NV10_PIPEADR, 0x00007060); 1238 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1239 1240 ACCW(NV10_PIPEADR, 0x00007070); 1241 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1242 1243 ACCW(NV10_PIPEADR, 0x00007080); 1244 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1245 1246 ACCW(NV10_PIPEADR, 0x00007090); 1247 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1248 1249 ACCW(NV10_PIPEADR, 0x000070a0); 1250 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1251 1252 ACCW(NV10_PIPEADR, 0x00006a80); 1253 ACCW(NV10_PIPEDAT, 0x00000000); 1254 ACCW(NV10_PIPEDAT, 0x00000000); 1255 ACCW(NV10_PIPEDAT, 0x3f800000); 1256 1257 ACCW(NV10_PIPEADR, 0x00006aa0); 1258 ACCW(NV10_PIPEDAT, 0x00000000); 1259 ACCW(NV10_PIPEDAT, 0x00000000); 1260 ACCW(NV10_PIPEDAT, 0x00000000); 1261 1262 ACCW(NV10_PIPEADR, 0x00000040); 1263 ACCW(NV10_PIPEDAT, 0x00000005); 1264 1265 ACCW(NV10_PIPEADR, 0x00006400); 1266 ACCW(NV10_PIPEDAT, 0x3f800000); 1267 ACCW(NV10_PIPEDAT, 0x3f800000); 1268 ACCW(NV10_PIPEDAT, 0x4b7fffff); 1269 ACCW(NV10_PIPEDAT, 0x00000000); 1270 1271 ACCW(NV10_PIPEADR, 0x00006410); 1272 ACCW(NV10_PIPEDAT, 0xc5000000); 1273 ACCW(NV10_PIPEDAT, 0xc5000000); 1274 ACCW(NV10_PIPEDAT, 0x00000000); 1275 ACCW(NV10_PIPEDAT, 0x00000000); 1276 1277 ACCW(NV10_PIPEADR, 0x00006420); 1278 ACCW(NV10_PIPEDAT, 0x00000000); 1279 ACCW(NV10_PIPEDAT, 0x00000000); 1280 ACCW(NV10_PIPEDAT, 0x00000000); 1281 ACCW(NV10_PIPEDAT, 0x00000000); 1282 1283 ACCW(NV10_PIPEADR, 0x00006430); 1284 ACCW(NV10_PIPEDAT, 0x00000000); 1285 ACCW(NV10_PIPEDAT, 0x00000000); 1286 ACCW(NV10_PIPEDAT, 0x00000000); 1287 ACCW(NV10_PIPEDAT, 0x00000000); 1288 1289 ACCW(NV10_PIPEADR, 0x000064c0); 1290 ACCW(NV10_PIPEDAT, 0x3f800000); 1291 ACCW(NV10_PIPEDAT, 0x3f800000); 1292 ACCW(NV10_PIPEDAT, 0x477fffff); 1293 ACCW(NV10_PIPEDAT, 0x3f800000); 1294 1295 ACCW(NV10_PIPEADR, 0x000064d0); 1296 ACCW(NV10_PIPEDAT, 0xc5000000); 1297 ACCW(NV10_PIPEDAT, 0xc5000000); 1298 ACCW(NV10_PIPEDAT, 0x00000000); 1299 ACCW(NV10_PIPEDAT, 0x00000000); 1300 1301 ACCW(NV10_PIPEADR, 0x000064e0); 1302 ACCW(NV10_PIPEDAT, 0xc4fff000); 1303 ACCW(NV10_PIPEDAT, 0xc4fff000); 1304 ACCW(NV10_PIPEDAT, 0x00000000); 1305 ACCW(NV10_PIPEDAT, 0x00000000); 1306 1307 ACCW(NV10_PIPEADR, 0x000064f0); 1308 ACCW(NV10_PIPEDAT, 0x00000000); 1309 ACCW(NV10_PIPEDAT, 0x00000000); 1310 ACCW(NV10_PIPEDAT, 0x00000000); 1311 ACCW(NV10_PIPEDAT, 0x00000000); 1312 1313 /* turn lightning on */ 1314 ACCW(NV10_XFMOD0, 0x30000000); 1315 /* set light 1 to infinite type, other lights remain off */ 1316 ACCW(NV10_XFMOD1, 0x00000004); 1317 1318 /* Z-buffer state is: 1319 * initialized, set to: 'fixed point' (integer?); Z-buffer; 16bits depth */ 1320 /* note: 1321 * other options possible are: floating point; 24bits depth; W-buffer(?) */ 1322 ACCW(GLOB_STAT_0, 0x10000000); 1323 /* set DMA instance 2 and 3 to be invalid */ 1324 ACCW(GLOB_STAT_1, 0x00000000); 1325 } 1326 } 1327 1328 static void nv_start_dma(void) 1329 { 1330 uint32 dummy; 1331 1332 if (si->engine.dma.current != si->engine.dma.put) 1333 { 1334 si->engine.dma.put = si->engine.dma.current; 1335 /* flush used caches so we know for sure the DMA cmd buffer received all data. */ 1336 if (si->ps.card_arch < NV40A) 1337 { 1338 /* some CPU's support out-of-order processing (WinChip/Cyrix). Flush them. */ 1339 __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory"); 1340 /* read a non-cached adress to flush the cash */ 1341 dummy = ACCR(STATUS); 1342 } 1343 else 1344 { 1345 /* dummy read the first adress of the framebuffer to flush MTRR-WC buffers */ 1346 dummy = *((volatile uint32 *)(si->framebuffer)); 1347 } 1348 1349 /* actually start DMA to execute all commands now in buffer */ 1350 /* note: 1351 * it doesn't matter which FIFO channel's DMA registers we access, they are in 1352 * fact all the same set. It also doesn't matter if the channel was assigned a 1353 * command or not. */ 1354 /* note also: 1355 * NV_GENERAL_DMAPUT is a write-only register on some cards (confirmed NV11). */ 1356 NV_REG32(NVACC_FIFO + NV_GENERAL_DMAPUT) = (si->engine.dma.put << 2); 1357 } 1358 } 1359 1360 /* this routine does not check the engine's internal hardware FIFO, but the DMA 1361 * command buffer. You can see this as a FIFO as well, that feeds the hardware FIFO. 1362 * The hardware FIFO state is checked by the DMA hardware automatically. */ 1363 static status_t nv_acc_fifofree_dma(uint16 cmd_size) 1364 { 1365 uint32 dmaget; 1366 1367 /* we'd better check for timeouts on the DMA engine as it's theoretically 1368 * breakable by malfunctioning software */ 1369 uint16 cnt = 0; 1370 1371 /* check if the DMA buffer has enough room for the command. 1372 * note: 1373 * engine.dma.free is 'cached' */ 1374 while ((si->engine.dma.free < cmd_size) && (cnt < 10000) && (err < 3)) 1375 { 1376 /* see where the engine is currently fetching from the buffer */ 1377 /* note: 1378 * read this only once in the code as accessing registers is relatively slow */ 1379 /* note also: 1380 * it doesn't matter which FIFO channel's DMA registers we access, they are in 1381 * fact all the same set. It also doesn't matter if the channel was assigned a 1382 * command or not. */ 1383 dmaget = ((NV_REG32(NVACC_FIFO + NV_GENERAL_DMAGET)) >> 2); 1384 1385 /* update timeout counter: on NV11 on a Pentium4 2.8Ghz max reached count 1386 * using BeRoMeter 1.2.6 was about 600; so counting 10000 before generating 1387 * a timeout should definately do it. Snooze()-ing cannot be done without a 1388 * serious speed penalty, even if done for only 1 microSecond. */ 1389 cnt++; 1390 1391 /* where's the engine fetching viewed from us issuing? */ 1392 if (si->engine.dma.put >= dmaget) 1393 { 1394 /* engine is fetching 'behind us', the last piece of the buffer is free */ 1395 1396 /* note the 'updated' free space we have in the DMA buffer */ 1397 si->engine.dma.free = si->engine.dma.max - si->engine.dma.current; 1398 /* if it's enough after all we exit this routine immediately. Else: */ 1399 if (si->engine.dma.free < cmd_size) 1400 { 1401 /* not enough room left, so instruct DMA engine to reset the buffer 1402 * when it's reaching the end of it */ 1403 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x20000000; 1404 /* reset our buffer pointer, so new commands will be placed at the 1405 * beginning of the buffer. */ 1406 si->engine.dma.current = 0; 1407 /* tell the engine to fetch the remaining command(s) in the DMA buffer 1408 * that where not executed before. */ 1409 nv_start_dma(); 1410 1411 /* NOW the engine is fetching 'in front of us', so the first piece 1412 * of the buffer is free */ 1413 1414 /* note the updated current free space we have in the DMA buffer */ 1415 si->engine.dma.free = dmaget - si->engine.dma.current; 1416 /* mind this pittfall: 1417 * Leave some room between where the engine is fetching and where we 1418 * put new commands. Otherwise the engine will crash on heavy loads. 1419 * A crash can be forced best in 640x480x32 mode with BeRoMeter 1.2.6. 1420 * (confirmed on NV11 and NV43 with less than 256 words forced freespace.) 1421 * Note: 1422 * The engine is DMA triggered for fetching chunks every 128 bytes, 1423 * maybe this is the reason for this behaviour. 1424 * Note also: 1425 * it looks like the space that needs to be kept free is coupled 1426 * with the size of the DMA buffer. */ 1427 if (si->engine.dma.free < 256) 1428 si->engine.dma.free = 0; 1429 else 1430 si->engine.dma.free -= 256; 1431 } 1432 } 1433 else 1434 { 1435 /* engine is fetching 'in front of us', so the first piece of the buffer 1436 * is free */ 1437 1438 /* note the updated current free space we have in the DMA buffer */ 1439 si->engine.dma.free = dmaget - si->engine.dma.current; 1440 /* mind this pittfall: 1441 * Leave some room between where the engine is fetching and where we 1442 * put new commands. Otherwise the engine will crash on heavy loads. 1443 * A crash can be forced best in 640x480x32 mode with BeRoMeter 1.2.6. 1444 * (confirmed on NV11 and NV43 with less than 256 words forced freespace.) 1445 * Note: 1446 * The engine is DMA triggered for fetching chunks every 128 bytes, 1447 * maybe this is the reason for this behaviour. 1448 * Note also: 1449 * it looks like the space that needs to be kept free is coupled 1450 * with the size of the DMA buffer. */ 1451 if (si->engine.dma.free < 256) 1452 si->engine.dma.free = 0; 1453 else 1454 si->engine.dma.free -= 256; 1455 } 1456 } 1457 1458 /* log timeout if we had one */ 1459 if (cnt == 10000) 1460 { 1461 if (err < 3) err++; 1462 LOG(4,("ACC_DMA: fifofree; DMA timeout #%d, engine trouble!\n", err)); 1463 } 1464 1465 /* we must make the acceleration routines abort or the driver will hang! */ 1466 if (err >= 3) return B_ERROR; 1467 1468 return B_OK; 1469 } 1470 1471 static void nv_acc_cmd_dma(uint32 cmd, uint16 offset, uint16 size) 1472 { 1473 /* NV_FIFO_DMA_OPCODE: set number of cmd words (b18 - 28); set FIFO offset for 1474 * first cmd word (b2 - 15); set DMA opcode = method (b29 - 31). 1475 * a 'NOP' is the opcode word $00000000. */ 1476 /* note: 1477 * possible DMA opcodes: 1478 * b'000' is 'method' (execute cmd); 1479 * b'001' is 'jump'; 1480 * b'002' is 'noninc method' (execute buffer wrap-around); 1481 * b'003' is 'call': return is executed by opcode word $00020000 (b17 = 1). */ 1482 /* note also: 1483 * this system uses auto-increments for the FIFO offset adresses. Make sure 1484 * to set a new adress if a gap exists between the previous one and the new one. */ 1485 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = ((size << 18) | 1486 ((si->engine.fifo.ch_ptr[cmd] + offset) & 0x0000fffc)); 1487 1488 /* space left after issuing the current command is the cmd AND it's arguments less */ 1489 si->engine.dma.free -= (size + 1); 1490 } 1491 1492 static void nv_acc_set_ch_dma(uint16 ch, uint32 handle) 1493 { 1494 /* issue FIFO channel assign cmd */ 1495 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = ((1 << 18) | ch); 1496 /* set new assignment */ 1497 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = (0x80000000 | handle); 1498 1499 /* space left after issuing the current command is the cmd AND it's arguments less */ 1500 si->engine.dma.free -= 2; 1501 } 1502 1503 void nv_acc_assert_fifo_dma(void) 1504 { 1505 /* does every engine cmd this accelerant needs have a FIFO channel? */ 1506 //fixme: can probably be optimized for both speed and channel selection... 1507 if (!si->engine.fifo.ch_ptr[NV_ROP5_SOLID] || 1508 !si->engine.fifo.ch_ptr[NV_IMAGE_BLACK_RECTANGLE] || 1509 !si->engine.fifo.ch_ptr[NV_IMAGE_PATTERN] || 1510 !si->engine.fifo.ch_ptr[NV4_SURFACE] || 1511 !si->engine.fifo.ch_ptr[NV_IMAGE_BLIT] || 1512 !si->engine.fifo.ch_ptr[NV4_GDI_RECTANGLE_TEXT]) 1513 { 1514 uint16 cnt; 1515 1516 /* free the FIFO channels we want from the currently assigned cmd's */ 1517 si->engine.fifo.ch_ptr[si->engine.fifo.handle[0]] = 0; 1518 si->engine.fifo.ch_ptr[si->engine.fifo.handle[1]] = 0; 1519 si->engine.fifo.ch_ptr[si->engine.fifo.handle[2]] = 0; 1520 si->engine.fifo.ch_ptr[si->engine.fifo.handle[3]] = 0; 1521 si->engine.fifo.ch_ptr[si->engine.fifo.handle[4]] = 0; 1522 si->engine.fifo.ch_ptr[si->engine.fifo.handle[5]] = 0; 1523 1524 /* set new object handles */ 1525 si->engine.fifo.handle[0] = NV_ROP5_SOLID; 1526 si->engine.fifo.handle[1] = NV_IMAGE_BLACK_RECTANGLE; 1527 si->engine.fifo.handle[2] = NV_IMAGE_PATTERN; 1528 si->engine.fifo.handle[3] = NV4_SURFACE; 1529 si->engine.fifo.handle[4] = NV_IMAGE_BLIT; 1530 si->engine.fifo.handle[5] = NV4_GDI_RECTANGLE_TEXT; 1531 1532 /* set handle's pointers to their assigned FIFO channels */ 1533 /* note: 1534 * b0-1 aren't used as adressbits. Using b0 to indicate a valid pointer. */ 1535 for (cnt = 0; cnt < 0x08; cnt++) 1536 { 1537 si->engine.fifo.ch_ptr[(si->engine.fifo.handle[cnt])] = 1538 (0x00000001 + (cnt * 0x00002000)); 1539 } 1540 1541 /* wait for room in fifo for new FIFO assigment cmds if needed. */ 1542 if (nv_acc_fifofree_dma(12) != B_OK) return; 1543 1544 /* program new FIFO assignments */ 1545 /* Raster OPeration: */ 1546 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH0, si->engine.fifo.handle[0]); 1547 /* Clip: */ 1548 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH1, si->engine.fifo.handle[1]); 1549 /* Pattern: */ 1550 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH2, si->engine.fifo.handle[2]); 1551 /* 2D Surface: */ 1552 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH3, si->engine.fifo.handle[3]); 1553 /* Blit: */ 1554 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH4, si->engine.fifo.handle[4]); 1555 /* Bitmap: */ 1556 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH5, si->engine.fifo.handle[5]); 1557 1558 /* tell the engine to fetch and execute all (new) commands in the DMA buffer */ 1559 nv_start_dma(); 1560 } 1561 } 1562 1563 /* 1564 note: 1565 moved acceleration 'top-level' routines to be integrated in the engine: 1566 it is costly to call the engine for every single function within a loop! 1567 (measured with BeRoMeter 1.2.6: upto 15% speed increase on all CPU's.) 1568 1569 note also: 1570 splitting up each command list into sublists (see routines below) prevents 1571 a lot more nested calls, further increasing the speed with upto 70%. 1572 1573 finally: 1574 sending the sublist to just one single engine command even further increases 1575 speed with upto another 10%. This can't be done for blits though, as this engine- 1576 command's hardware does not support multiple objects. 1577 */ 1578 1579 /* screen to screen blit - i.e. move windows around and scroll within them. */ 1580 void SCREEN_TO_SCREEN_BLIT_DMA(engine_token *et, blit_params *list, uint32 count) 1581 { 1582 uint32 i = 0; 1583 uint16 subcnt; 1584 1585 /*** init acc engine for blit function ***/ 1586 /* ROP registers (Raster OPeration): 1587 * wait for room in fifo for ROP cmd if needed. */ 1588 if (nv_acc_fifofree_dma(2) != B_OK) return; 1589 /* now setup ROP (writing 2 32bit words) for GXcopy */ 1590 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); 1591 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xcc; /* SetRop5 */ 1592 1593 /*** do each blit ***/ 1594 /* Note: 1595 * blit-copy direction is determined inside nvidia hardware: no setup needed */ 1596 while (count) 1597 { 1598 /* break up the list in sublists to minimize calls, while making sure long 1599 * lists still get executed without trouble */ 1600 subcnt = 32; 1601 if (count < 32) subcnt = count; 1602 count -= subcnt; 1603 1604 /* wait for room in fifo for blit cmd if needed. */ 1605 if (nv_acc_fifofree_dma(4 * subcnt) != B_OK) return; 1606 1607 while (subcnt--) 1608 { 1609 /* now setup blit (writing 4 32bit words) */ 1610 nv_acc_cmd_dma(NV_IMAGE_BLIT, NV_IMAGE_BLIT_SOURCEORG, 3); 1611 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1612 (((list[i].src_top) << 16) | (list[i].src_left)); /* SourceOrg */ 1613 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1614 (((list[i].dest_top) << 16) | (list[i].dest_left)); /* DestOrg */ 1615 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1616 ((((list[i].height) + 1) << 16) | ((list[i].width) + 1)); /* HeightWidth */ 1617 1618 i++; 1619 } 1620 1621 /* tell the engine to fetch the commands in the DMA buffer that where not 1622 * executed before. */ 1623 nv_start_dma(); 1624 } 1625 1626 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 1627 si->engine.threeD.reload = 0xffffffff; 1628 } 1629 1630 /* rectangle fill - i.e. workspace and window background color */ 1631 void FILL_RECTANGLE_DMA(engine_token *et, uint32 colorIndex, fill_rect_params *list, uint32 count) 1632 { 1633 uint32 i = 0; 1634 uint16 subcnt; 1635 1636 /*** init acc engine for fill function ***/ 1637 /* ROP registers (Raster OPeration): 1638 * wait for room in fifo for ROP and bitmap cmd if needed. */ 1639 if (nv_acc_fifofree_dma(4) != B_OK) return; 1640 /* now setup ROP (writing 2 32bit words) for GXcopy */ 1641 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); 1642 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xcc; /* SetRop5 */ 1643 /* now setup fill color (writing 2 32bit words) */ 1644 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); 1645 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = colorIndex; /* Color1A */ 1646 1647 /*** draw each rectangle ***/ 1648 while (count) 1649 { 1650 /* break up the list in sublists to minimize calls, while making sure long 1651 * lists still get executed without trouble */ 1652 subcnt = 32; 1653 if (count < 32) subcnt = count; 1654 count -= subcnt; 1655 1656 /* wait for room in fifo for bitmap cmd if needed. */ 1657 if (nv_acc_fifofree_dma(1 + (2 * subcnt)) != B_OK) return; 1658 1659 /* issue fill command once... */ 1660 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, (2 * subcnt)); 1661 /* ... and send multiple rects (engine cmd supports 32 max) */ 1662 while (subcnt--) 1663 { 1664 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1665 (((list[i].left) << 16) | ((list[i].top) & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */ 1666 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1667 (((((list[i].right)+1) - (list[i].left)) << 16) | 1668 (((list[i].bottom-list[i].top)+1) & 0x0000ffff)); /* Unclipped Rect 0 WidthHeight */ 1669 1670 i++; 1671 } 1672 1673 /* tell the engine to fetch the commands in the DMA buffer that where not 1674 * executed before. */ 1675 nv_start_dma(); 1676 } 1677 1678 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 1679 si->engine.threeD.reload = 0xffffffff; 1680 } 1681 1682 /* span fill - i.e. (selected) menuitem background color (Dano) */ 1683 void FILL_SPAN_DMA(engine_token *et, uint32 colorIndex, uint16 *list, uint32 count) 1684 { 1685 uint32 i = 0; 1686 uint16 subcnt; 1687 1688 /*** init acc engine for fill function ***/ 1689 /* ROP registers (Raster OPeration): 1690 * wait for room in fifo for ROP and bitmap cmd if needed. */ 1691 if (nv_acc_fifofree_dma(4) != B_OK) return; 1692 /* now setup ROP (writing 2 32bit words) for GXcopy */ 1693 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); 1694 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xcc; /* SetRop5 */ 1695 /* now setup fill color (writing 2 32bit words) */ 1696 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); 1697 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = colorIndex; /* Color1A */ 1698 1699 /*** draw each span ***/ 1700 while (count) 1701 { 1702 /* break up the list in sublists to minimize calls, while making sure long 1703 * lists still get executed without trouble */ 1704 subcnt = 32; 1705 if (count < 32) subcnt = count; 1706 count -= subcnt; 1707 1708 /* wait for room in fifo for bitmap cmd if needed. */ 1709 if (nv_acc_fifofree_dma(1 + (2 * subcnt)) != B_OK) return; 1710 1711 /* issue fill command once... */ 1712 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, (2 * subcnt)); 1713 /* ... and send multiple rects (spans) (engine cmd supports 32 max) */ 1714 while (subcnt--) 1715 { 1716 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1717 (((list[i+1]) << 16) | ((list[i]) & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */ 1718 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1719 ((((list[i+2]+1) - (list[i+1])) << 16) | 0x00000001); /* Unclipped Rect 0 WidthHeight */ 1720 1721 i+=3; 1722 } 1723 1724 /* tell the engine to fetch the commands in the DMA buffer that where not 1725 * executed before. */ 1726 nv_start_dma(); 1727 } 1728 1729 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 1730 si->engine.threeD.reload = 0xffffffff; 1731 } 1732 1733 /* rectangle invert - i.e. text cursor and text selection */ 1734 void INVERT_RECTANGLE_DMA(engine_token *et, fill_rect_params *list, uint32 count) 1735 { 1736 uint32 i = 0; 1737 uint16 subcnt; 1738 1739 /*** init acc engine for invert function ***/ 1740 /* ROP registers (Raster OPeration): 1741 * wait for room in fifo for ROP and bitmap cmd if needed. */ 1742 if (nv_acc_fifofree_dma(4) != B_OK) return; 1743 /* now setup ROP (writing 2 32bit words) for GXinvert */ 1744 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); 1745 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x55; /* SetRop5 */ 1746 /* now reset fill color (writing 2 32bit words) */ 1747 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); 1748 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000000; /* Color1A */ 1749 1750 /*** invert each rectangle ***/ 1751 while (count) 1752 { 1753 /* break up the list in sublists to minimize calls, while making sure long 1754 * lists still get executed without trouble */ 1755 subcnt = 32; 1756 if (count < 32) subcnt = count; 1757 count -= subcnt; 1758 1759 /* wait for room in fifo for bitmap cmd if needed. */ 1760 if (nv_acc_fifofree_dma(1 + (2 * subcnt)) != B_OK) return; 1761 1762 /* issue fill command once... */ 1763 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, (2 * subcnt)); 1764 /* ... and send multiple rects (engine cmd supports 32 max) */ 1765 while (subcnt--) 1766 { 1767 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1768 (((list[i].left) << 16) | ((list[i].top) & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */ 1769 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1770 (((((list[i].right)+1) - (list[i].left)) << 16) | 1771 (((list[i].bottom-list[i].top)+1) & 0x0000ffff)); /* Unclipped Rect 0 WidthHeight */ 1772 1773 i++; 1774 } 1775 1776 /* tell the engine to fetch the commands in the DMA buffer that where not 1777 * executed before. */ 1778 nv_start_dma(); 1779 } 1780 1781 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 1782 si->engine.threeD.reload = 0xffffffff; 1783 } 1784