1 /* NV Acceleration functions */ 2 3 /* Author: 4 Rudolf Cornelissen 8/2003-6/2010. 5 6 This code was possible thanks to: 7 - the Linux XFree86 NV driver, 8 - the Linux UtahGLX 3D driver. 9 */ 10 11 #define MODULE_BIT 0x00080000 12 13 #include "nv_std.h" 14 15 /*acceleration notes*/ 16 17 /*functions Be's app_server uses: 18 fill span (horizontal only) 19 fill rectangle (these 2 are very similar) 20 invert rectangle 21 blit 22 */ 23 24 static void nv_init_for_3D_dma(void); 25 static void nv_start_dma(void); 26 static status_t nv_acc_fifofree_dma(uint16 cmd_size); 27 static void nv_acc_cmd_dma(uint32 cmd, uint16 offset, uint16 size); 28 static void nv_acc_set_ch_dma(uint16 ch, uint32 handle); 29 30 /* used to track engine DMA stalls */ 31 static uint8 err; 32 33 /* wait until engine completely idle */ 34 status_t nv_acc_wait_idle_dma() 35 { 36 /* we'd better check for timeouts on the DMA engine as it's theoretically 37 * breakable by malfunctioning software */ 38 uint16 cnt = 0; 39 40 /* wait until all upcoming commands are in execution at least. Do this until 41 * we hit a timeout; abort if we failed at least three times before: 42 * if DMA stalls, we have to forget about it alltogether at some point, or 43 * the system will almost come to a complete halt.. */ 44 /* note: 45 * it doesn't matter which FIFO channel's DMA registers we access, they are in 46 * fact all the same set. It also doesn't matter if the channel was assigned a 47 * command or not. */ 48 while ((NV_REG32(NVACC_FIFO + NV_GENERAL_DMAGET) != (si->engine.dma.put << 2)) && 49 (cnt < 10000) && (err < 3)) 50 { 51 /* snooze a bit so I do not hammer the bus */ 52 snooze (100); 53 cnt++; 54 } 55 56 /* log timeout if we had one */ 57 if (cnt == 10000) 58 { 59 if (err < 3) err++; 60 LOG(4,("ACC_DMA: wait_idle; DMA timeout #%d, engine trouble!\n", err)); 61 } 62 63 /* wait until execution completed */ 64 while (ACCR(STATUS)) 65 { 66 /* snooze a bit so I do not hammer the bus */ 67 snooze (100); 68 } 69 70 return B_OK; 71 } 72 73 /* AFAIK this must be done for every new screenmode. 74 * Engine required init. */ 75 status_t nv_acc_init_dma() 76 { 77 uint32 cnt, tmp; 78 uint32 surf_depth, cmd_depth; 79 /* reset the engine DMA stalls counter */ 80 err = 0; 81 82 /* a hanging engine only recovers from a complete power-down/power-up cycle */ 83 NV_REG32(NV32_PWRUPCTRL) = 0xffff00ff; 84 snooze(1000); 85 NV_REG32(NV32_PWRUPCTRL) = 0xffffffff; 86 87 /* don't try this on NV20 and later.. */ 88 /* note: 89 * the specific register that's responsible for the speedfix on NV18 is 90 * $00400ed8: bit 6 needs to be zero for fastest rendering (confirmed). */ 91 /* note also: 92 * on NV28 the following ranges could be reset (confirmed): 93 * $00400000 upto/incl. $004002fc; 94 * $00400400 upto/incl. $004017fc; 95 * $0040180c upto/incl. $00401948; 96 * $00401994 upto/incl. $00401a80; 97 * $00401a94 upto/incl. $00401ffc. 98 * The intermediate ranges hang the engine upon resetting. */ 99 if (si->ps.card_arch < NV20A) 100 { 101 /* actively reset the PGRAPH registerset (acceleration engine) */ 102 for (cnt = 0x00400000; cnt < 0x00402000; cnt +=4) 103 { 104 NV_REG32(cnt) = 0x00000000; 105 } 106 } 107 108 /* setup PTIMER: */ 109 LOG(4,("ACC_DMA: timer numerator $%08x, denominator $%08x\n", ACCR(PT_NUMERATOR), ACCR(PT_DENOMINATR))); 110 111 /* The NV28 BIOS programs PTIMER like this (see coldstarting in nv_info.c) */ 112 //ACCW(PT_NUMERATOR, (si->ps.std_engine_clock * 20)); 113 //ACCW(PT_DENOMINATR, 0x00000271); 114 /* Nouveau (march 2009) mentions something like: writing 8 and 3 to these regs breaks the timings 115 * on the LVDS hardware sequencing microcode. A correct solution involves calculations with the GPU PLL. */ 116 117 /* For now use BIOS pre-programmed values if there */ 118 if (!ACCR(PT_NUMERATOR) || !ACCR(PT_DENOMINATR)) { 119 /* set timer numerator to 8 (in b0-15) */ 120 ACCW(PT_NUMERATOR, 0x00000008); 121 /* set timer denominator to 3 (in b0-15) */ 122 ACCW(PT_DENOMINATR, 0x00000003); 123 } 124 125 /* disable timer-alarm INT requests (b0) */ 126 ACCW(PT_INTEN, 0x00000000); 127 /* reset timer-alarm INT status bit (b0) */ 128 ACCW(PT_INTSTAT, 0xffffffff); 129 130 /* enable PRAMIN write access on pre NV10 before programming it! */ 131 if (si->ps.card_arch == NV04A) 132 { 133 /* set framebuffer config: type = notiling, PRAMIN write access enabled */ 134 NV_REG32(NV32_PFB_CONFIG_0) = 0x00001114; 135 } 136 else 137 { 138 /* setup acc engine 'source' tile adressranges */ 139 if ((si->ps.card_type <= NV40) || (si->ps.card_type == NV45)) 140 { 141 ACCW(NV10_FBTIL0AD, 0); 142 ACCW(NV10_FBTIL1AD, 0); 143 ACCW(NV10_FBTIL2AD, 0); 144 ACCW(NV10_FBTIL3AD, 0); 145 ACCW(NV10_FBTIL4AD, 0); 146 ACCW(NV10_FBTIL5AD, 0); 147 ACCW(NV10_FBTIL6AD, 0); 148 ACCW(NV10_FBTIL7AD, 0); 149 ACCW(NV10_FBTIL0ED, (si->ps.memory_size - 1)); 150 ACCW(NV10_FBTIL1ED, (si->ps.memory_size - 1)); 151 ACCW(NV10_FBTIL2ED, (si->ps.memory_size - 1)); 152 ACCW(NV10_FBTIL3ED, (si->ps.memory_size - 1)); 153 ACCW(NV10_FBTIL4ED, (si->ps.memory_size - 1)); 154 ACCW(NV10_FBTIL5ED, (si->ps.memory_size - 1)); 155 ACCW(NV10_FBTIL6ED, (si->ps.memory_size - 1)); 156 ACCW(NV10_FBTIL7ED, (si->ps.memory_size - 1)); 157 } 158 else 159 { 160 /* NV41, 43, 44, G70 and up */ 161 ACCW(NV41_FBTIL0AD, 0); 162 ACCW(NV41_FBTIL1AD, 0); 163 ACCW(NV41_FBTIL2AD, 0); 164 ACCW(NV41_FBTIL3AD, 0); 165 ACCW(NV41_FBTIL4AD, 0); 166 ACCW(NV41_FBTIL5AD, 0); 167 ACCW(NV41_FBTIL6AD, 0); 168 ACCW(NV41_FBTIL7AD, 0); 169 ACCW(NV41_FBTIL8AD, 0); 170 ACCW(NV41_FBTIL9AD, 0); 171 ACCW(NV41_FBTILAAD, 0); 172 ACCW(NV41_FBTILBAD, 0); 173 ACCW(NV41_FBTIL0ED, (si->ps.memory_size - 1)); 174 ACCW(NV41_FBTIL1ED, (si->ps.memory_size - 1)); 175 ACCW(NV41_FBTIL2ED, (si->ps.memory_size - 1)); 176 ACCW(NV41_FBTIL3ED, (si->ps.memory_size - 1)); 177 ACCW(NV41_FBTIL4ED, (si->ps.memory_size - 1)); 178 ACCW(NV41_FBTIL5ED, (si->ps.memory_size - 1)); 179 ACCW(NV41_FBTIL6ED, (si->ps.memory_size - 1)); 180 ACCW(NV41_FBTIL7ED, (si->ps.memory_size - 1)); 181 ACCW(NV41_FBTIL8ED, (si->ps.memory_size - 1)); 182 ACCW(NV41_FBTIL9ED, (si->ps.memory_size - 1)); 183 ACCW(NV41_FBTILAED, (si->ps.memory_size - 1)); 184 ACCW(NV41_FBTILBED, (si->ps.memory_size - 1)); 185 186 if (si->ps.card_type >= G70) 187 { 188 ACCW(G70_FBTILCAD, 0); 189 ACCW(G70_FBTILDAD, 0); 190 ACCW(G70_FBTILEAD, 0); 191 ACCW(G70_FBTILCED, (si->ps.memory_size - 1)); 192 ACCW(G70_FBTILDED, (si->ps.memory_size - 1)); 193 ACCW(G70_FBTILEED, (si->ps.memory_size - 1)); 194 } 195 } 196 } 197 198 /*** PRAMIN ***/ 199 /* first clear the entire RAMHT (hash-table) space to a defined state. It turns 200 * out at least NV11 will keep the previously programmed handles over resets and 201 * power-outages upto about 15 seconds!! Faulty entries might well hang the 202 * engine (confirmed on NV11). 203 * Note: 204 * this behaviour is not very strange: even very old DRAM chips are known to be 205 * able to do this, even though you should refresh them every few milliseconds or 206 * so. (Large memory cell capacitors, though different cells vary a lot in their 207 * capacity.) 208 * Of course data validity is not certain by a long shot over this large 209 * amount of time.. */ 210 for(cnt = 0; cnt < 0x0400; cnt++) 211 NV_REG32(NVACC_HT_HANDL_00 + (cnt << 2)) = 0; 212 /* RAMHT (hash-table) space SETUP FIFO HANDLES */ 213 /* note: 214 * 'instance' tells you where the engine command is stored in 'PR_CTXx_x' sets 215 * below: instance being b4-19 with baseadress NV_PRAMIN_CTX_0 (0x00700000). 216 * That command is linked to the handle noted here. This handle is then used to 217 * tell the FIFO to which engine command it is connected! 218 * (CTX registers are actually a sort of RAM space.) */ 219 if (si->ps.card_arch >= NV40A) 220 { 221 /* (first set) */ 222 ACCW(HT_HANDL_00, (0x80000000 | NV10_CONTEXT_SURFACES_2D)); /* 32bit handle (not used) */ 223 ACCW(HT_VALUE_00, 0x0010114c); /* instance $114c, engine = acc engine, CHID = $00 */ 224 225 ACCW(HT_HANDL_01, (0x80000000 | NV_IMAGE_BLIT)); /* 32bit handle */ 226 ACCW(HT_VALUE_01, 0x00101148); /* instance $1148, engine = acc engine, CHID = $00 */ 227 228 ACCW(HT_HANDL_02, (0x80000000 | NV4_GDI_RECTANGLE_TEXT)); /* 32bit handle */ 229 ACCW(HT_VALUE_02, 0x0010114a); /* instance $114a, engine = acc engine, CHID = $00 */ 230 231 /* (second set) */ 232 ACCW(HT_HANDL_10, (0x80000000 | NV_ROP5_SOLID)); /* 32bit handle */ 233 ACCW(HT_VALUE_10, 0x00101142); /* instance $1142, engine = acc engine, CHID = $00 */ 234 235 ACCW(HT_HANDL_11, (0x80000000 | NV_IMAGE_BLACK_RECTANGLE)); /* 32bit handle */ 236 ACCW(HT_VALUE_11, 0x00101144); /* instance $1144, engine = acc engine, CHID = $00 */ 237 238 ACCW(HT_HANDL_12, (0x80000000 | NV_IMAGE_PATTERN)); /* 32bit handle */ 239 ACCW(HT_VALUE_12, 0x00101146); /* instance $1146, engine = acc engine, CHID = $00 */ 240 241 ACCW(HT_HANDL_13, (0x80000000 | NV_SCALED_IMAGE_FROM_MEMORY)); /* 32bit handle */ 242 ACCW(HT_VALUE_13, 0x0010114e); /* instance $114e, engine = acc engine, CHID = $00 */ 243 } 244 else 245 { 246 /* (first set) */ 247 ACCW(HT_HANDL_00, (0x80000000 | NV4_SURFACE)); /* 32bit handle */ 248 ACCW(HT_VALUE_00, 0x80011145); /* instance $1145, engine = acc engine, CHID = $00 */ 249 250 ACCW(HT_HANDL_01, (0x80000000 | NV_IMAGE_BLIT)); /* 32bit handle */ 251 ACCW(HT_VALUE_01, 0x80011146); /* instance $1146, engine = acc engine, CHID = $00 */ 252 253 ACCW(HT_HANDL_02, (0x80000000 | NV4_GDI_RECTANGLE_TEXT)); /* 32bit handle */ 254 ACCW(HT_VALUE_02, 0x80011147); /* instance $1147, engine = acc engine, CHID = $00 */ 255 256 ACCW(HT_HANDL_03, (0x80000000 | NV4_CONTEXT_SURFACES_ARGB_ZS)); /* 32bit handle (3D) */ 257 ACCW(HT_VALUE_03, 0x80011148); /* instance $1148, engine = acc engine, CHID = $00 */ 258 259 /* NV4_ and NV10_DX5_TEXTURE_TRIANGLE should be identical */ 260 ACCW(HT_HANDL_04, (0x80000000 | NV4_DX5_TEXTURE_TRIANGLE)); /* 32bit handle (3D) */ 261 ACCW(HT_VALUE_04, 0x80011149); /* instance $1149, engine = acc engine, CHID = $00 */ 262 263 /* NV4_ and NV10_DX6_MULTI_TEXTURE_TRIANGLE should be identical */ 264 ACCW(HT_HANDL_05, (0x80000000 | NV4_DX6_MULTI_TEXTURE_TRIANGLE)); /* 32bit handle (not used) */ 265 ACCW(HT_VALUE_05, 0x8001114a); /* instance $114a, engine = acc engine, CHID = $00 */ 266 267 ACCW(HT_HANDL_06, (0x80000000 | NV1_RENDER_SOLID_LIN)); /* 32bit handle (not used) */ 268 ACCW(HT_VALUE_06, 0x8001114c); /* instance $114c, engine = acc engine, CHID = $00 */ 269 270 /* (second set) */ 271 ACCW(HT_HANDL_10, (0x80000000 | NV_ROP5_SOLID)); /* 32bit handle */ 272 ACCW(HT_VALUE_10, 0x80011142); /* instance $1142, engine = acc engine, CHID = $00 */ 273 274 ACCW(HT_HANDL_11, (0x80000000 | NV_IMAGE_BLACK_RECTANGLE)); /* 32bit handle */ 275 ACCW(HT_VALUE_11, 0x80011143); /* instance $1143, engine = acc engine, CHID = $00 */ 276 277 ACCW(HT_HANDL_12, (0x80000000 | NV_IMAGE_PATTERN)); /* 32bit handle */ 278 ACCW(HT_VALUE_12, 0x80011144); /* instance $1144, engine = acc engine, CHID = $00 */ 279 280 ACCW(HT_HANDL_13, (0x80000000 | NV_SCALED_IMAGE_FROM_MEMORY)); /* 32bit handle */ 281 ACCW(HT_VALUE_13, 0x8001114b); /* instance $114b, engine = acc engine, CHID = $00 */ 282 283 //2007 3D tests.. 284 if (si->ps.card_type == NV15) 285 { 286 ACCW(HT_HANDL_14, (0x80000000 | NV_TCL_PRIMITIVE_3D)); /* 32bit handle */ 287 ACCW(HT_VALUE_14, 0x8001114d); /* instance $114d, engine = acc engine, CHID = $00 */ 288 } 289 290 } 291 292 /* program CTX registers: CTX1 is mostly done later (colorspace dependant) */ 293 /* note: 294 * CTX determines which HT handles point to what engine commands. */ 295 /* note also: 296 * CTX registers are in fact in the same GPU internal RAM space as the engine's 297 * hashtable. This means that stuff programmed in here also survives resets and 298 * power-outages! (confirmed NV11) */ 299 if (si->ps.card_arch >= NV40A) 300 { 301 /* setup a DMA define for use by command defines below. */ 302 ACCW(PR_CTX0_R, 0x00003000); /* DMA page table present and of linear type; 303 * DMA target node is NVM (non-volatile memory?) 304 * (instead of doing PCI or AGP transfers) */ 305 ACCW(PR_CTX1_R, (si->ps.memory_size - 1)); /* DMA limit: size is all cardRAM */ 306 ACCW(PR_CTX2_R, ((0x00000000 & 0xfffff000) | 0x00000002)); 307 /* DMA access type is READ_AND_WRITE; 308 * memory starts at start of cardRAM (b12-31): 309 * It's adress needs to be at a 4kb boundary! */ 310 ACCW(PR_CTX3_R, 0x00000002); /* unknown (looks like this is rubbish/not needed?) */ 311 /* setup set '0' for cmd NV_ROP5_SOLID */ 312 ACCW(PR_CTX0_0, 0x02080043); /* NVclass $043, patchcfg ROP_AND, nv10+: little endian */ 313 ACCW(PR_CTX1_0, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 314 ACCW(PR_CTX2_0, 0x00000000); /* DMA0 and DMA1 instance invalid */ 315 ACCW(PR_CTX3_0, 0x00000000); /* method traps disabled */ 316 ACCW(PR_CTX0_1, 0x00000000); /* extra */ 317 ACCW(PR_CTX1_1, 0x00000000); /* extra */ 318 /* setup set '1' for cmd NV_IMAGE_BLACK_RECTANGLE */ 319 ACCW(PR_CTX0_2, 0x02080019); /* NVclass $019, patchcfg ROP_AND, nv10+: little endian */ 320 ACCW(PR_CTX1_2, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 321 ACCW(PR_CTX2_2, 0x00000000); /* DMA0 and DMA1 instance invalid */ 322 ACCW(PR_CTX3_2, 0x00000000); /* method traps disabled */ 323 ACCW(PR_CTX0_3, 0x00000000); /* extra */ 324 ACCW(PR_CTX1_3, 0x00000000); /* extra */ 325 /* setup set '2' for cmd NV_IMAGE_PATTERN */ 326 ACCW(PR_CTX0_4, 0x02080018); /* NVclass $018, patchcfg ROP_AND, nv10+: little endian */ 327 ACCW(PR_CTX1_4, 0x02000000); /* colorspace not set, notify instance is $0200 (b16-31) */ 328 ACCW(PR_CTX2_4, 0x00000000); /* DMA0 and DMA1 instance invalid */ 329 ACCW(PR_CTX3_4, 0x00000000); /* method traps disabled */ 330 ACCW(PR_CTX0_5, 0x00000000); /* extra */ 331 ACCW(PR_CTX1_5, 0x00000000); /* extra */ 332 /* setup set '4' for cmd NV12_IMAGE_BLIT */ 333 ACCW(PR_CTX0_6, 0x0208009f); /* NVclass $09f, patchcfg ROP_AND, nv10+: little endian */ 334 ACCW(PR_CTX1_6, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 335 ACCW(PR_CTX2_6, 0x00001140); /* DMA0 instance is $1140, DMA1 instance invalid */ 336 ACCW(PR_CTX3_6, 0x00001140); /* method trap 0 is $1140, trap 1 disabled */ 337 ACCW(PR_CTX0_7, 0x00000000); /* extra */ 338 ACCW(PR_CTX1_7, 0x00000000); /* extra */ 339 /* setup set '5' for cmd NV4_GDI_RECTANGLE_TEXT */ 340 ACCW(PR_CTX0_8, 0x0208004a); /* NVclass $04a, patchcfg ROP_AND, nv10+: little endian */ 341 ACCW(PR_CTX1_8, 0x02000000); /* colorspace not set, notify instance is $0200 (b16-31) */ 342 ACCW(PR_CTX2_8, 0x00000000); /* DMA0 and DMA1 instance invalid */ 343 ACCW(PR_CTX3_8, 0x00000000); /* method traps disabled */ 344 ACCW(PR_CTX0_9, 0x00000000); /* extra */ 345 ACCW(PR_CTX1_9, 0x00000000); /* extra */ 346 /* setup set '6' for cmd NV10_CONTEXT_SURFACES_2D */ 347 ACCW(PR_CTX0_A, 0x02080062); /* NVclass $062, nv10+: little endian */ 348 ACCW(PR_CTX1_A, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 349 ACCW(PR_CTX2_A, 0x00001140); /* DMA0 instance is $1140, DMA1 instance invalid */ 350 ACCW(PR_CTX3_A, 0x00001140); /* method trap 0 is $1140, trap 1 disabled */ 351 ACCW(PR_CTX0_B, 0x00000000); /* extra */ 352 ACCW(PR_CTX1_B, 0x00000000); /* extra */ 353 /* setup set '7' for cmd NV_SCALED_IMAGE_FROM_MEMORY */ 354 ACCW(PR_CTX0_C, 0x02080077); /* NVclass $077, nv10+: little endian */ 355 ACCW(PR_CTX1_C, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 356 ACCW(PR_CTX2_C, 0x00001140); /* DMA0 instance is $1140, DMA1 instance invalid */ 357 ACCW(PR_CTX3_C, 0x00001140); /* method trap 0 is $1140, trap 1 disabled */ 358 ACCW(PR_CTX0_D, 0x00000000); /* extra */ 359 ACCW(PR_CTX1_D, 0x00000000); /* extra */ 360 /* setup DMA set pointed at by PF_CACH1_DMAI */ 361 ACCW(PR_CTX0_E, 0x00003002); /* DMA page table present and of linear type; 362 * DMA class is $002 (b0-11); 363 * DMA target node is NVM (non-volatile memory?) 364 * (instead of doing PCI or AGP transfers) */ 365 ACCW(PR_CTX1_E, 0x00007fff); /* DMA limit: tablesize is 32k bytes */ 366 ACCW(PR_CTX2_E, (((si->ps.memory_size - 1) & 0xffff8000) | 0x00000002)); 367 /* DMA access type is READ_AND_WRITE; 368 * table is located at end of cardRAM (b12-31): 369 * It's adress needs to be at a 4kb boundary! */ 370 } 371 else 372 { 373 /* setup a DMA define for use by command defines below. */ 374 ACCW(PR_CTX0_R, 0x00003000); /* DMA page table present and of linear type; 375 * DMA target node is NVM (non-volatile memory?) 376 * (instead of doing PCI or AGP transfers) */ 377 ACCW(PR_CTX1_R, (si->ps.memory_size - 1)); /* DMA limit: size is all cardRAM */ 378 ACCW(PR_CTX2_R, ((0x00000000 & 0xfffff000) | 0x00000002)); 379 /* DMA access type is READ_AND_WRITE; 380 * memory starts at start of cardRAM (b12-31): 381 * It's adress needs to be at a 4kb boundary! */ 382 ACCW(PR_CTX3_R, 0x00000002); /* unknown (looks like this is rubbish/not needed?) */ 383 /* setup set '0' for cmd NV_ROP5_SOLID */ 384 ACCW(PR_CTX0_0, 0x01008043); /* NVclass $043, patchcfg ROP_AND, nv10+: little endian */ 385 ACCW(PR_CTX1_0, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 386 ACCW(PR_CTX2_0, 0x00000000); /* DMA0 and DMA1 instance invalid */ 387 ACCW(PR_CTX3_0, 0x00000000); /* method traps disabled */ 388 /* setup set '1' for cmd NV_IMAGE_BLACK_RECTANGLE */ 389 ACCW(PR_CTX0_1, 0x01008019); /* NVclass $019, patchcfg ROP_AND, nv10+: little endian */ 390 ACCW(PR_CTX1_1, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 391 ACCW(PR_CTX2_1, 0x00000000); /* DMA0 and DMA1 instance invalid */ 392 ACCW(PR_CTX3_1, 0x00000000); /* method traps disabled */ 393 /* setup set '2' for cmd NV_IMAGE_PATTERN */ 394 ACCW(PR_CTX0_2, 0x01008018); /* NVclass $018, patchcfg ROP_AND, nv10+: little endian */ 395 ACCW(PR_CTX1_2, 0x00000002); /* colorspace not set, notify instance is $0200 (b16-31) */ 396 ACCW(PR_CTX2_2, 0x00000000); /* DMA0 and DMA1 instance invalid */ 397 ACCW(PR_CTX3_2, 0x00000000); /* method traps disabled */ 398 /* setup set '3' for ... */ 399 if(si->ps.card_arch >= NV10A) 400 { 401 /* ... cmd NV10_CONTEXT_SURFACES_2D */ 402 ACCW(PR_CTX0_3, 0x01008062); /* NVclass $062, nv10+: little endian */ 403 } 404 else 405 { 406 /* ... cmd NV4_SURFACE */ 407 ACCW(PR_CTX0_3, 0x01008042); /* NVclass $042, nv10+: little endian */ 408 } 409 ACCW(PR_CTX1_3, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 410 ACCW(PR_CTX2_3, 0x11401140); /* DMA0 instance is $1140, DMA1 instance invalid */ 411 ACCW(PR_CTX3_3, 0x00000000); /* method trap 0 is $1140, trap 1 disabled */ 412 /* setup set '4' for ... */ 413 if (si->ps.card_type >= NV11) 414 { 415 /* ... cmd NV12_IMAGE_BLIT */ 416 ACCW(PR_CTX0_4, 0x0100809f); /* NVclass $09f, patchcfg ROP_AND, nv10+: little endian */ 417 } 418 else 419 { 420 /* ... cmd NV_IMAGE_BLIT */ 421 ACCW(PR_CTX0_4, 0x0100805f); /* NVclass $05f, patchcfg ROP_AND, nv10+: little endian */ 422 } 423 ACCW(PR_CTX1_4, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 424 ACCW(PR_CTX2_4, 0x11401140); /* DMA0 instance is $1140, DMA1 instance invalid */ 425 ACCW(PR_CTX3_4, 0x00000000); /* method trap 0 is $1140, trap 1 disabled */ 426 /* setup set '5' for cmd NV4_GDI_RECTANGLE_TEXT */ 427 ACCW(PR_CTX0_5, 0x0100804a); /* NVclass $04a, patchcfg ROP_AND, nv10+: little endian */ 428 ACCW(PR_CTX1_5, 0x00000002); /* colorspace not set, notify instance is $0200 (b16-31) */ 429 ACCW(PR_CTX2_5, 0x00000000); /* DMA0 and DMA1 instance invalid */ 430 ACCW(PR_CTX3_5, 0x00000000); /* method traps disabled */ 431 /* setup set '6' ... */ 432 if (si->ps.card_arch >= NV10A) 433 { 434 /* ... for cmd NV10_CONTEXT_SURFACES_ARGB_ZS */ 435 ACCW(PR_CTX0_6, 0x00000093); /* NVclass $093, nv10+: little endian */ 436 } 437 else 438 { 439 /* ... for cmd NV4_CONTEXT_SURFACES_ARGB_ZS */ 440 ACCW(PR_CTX0_6, 0x00000053); /* NVclass $053, nv10+: little endian */ 441 } 442 ACCW(PR_CTX1_6, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 443 ACCW(PR_CTX2_6, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 444 ACCW(PR_CTX3_6, 0x00000000); /* method traps disabled */ 445 /* setup set '7' ... */ 446 if (si->ps.card_arch >= NV10A) 447 { 448 /* ... for cmd NV10_DX5_TEXTURE_TRIANGLE */ 449 ACCW(PR_CTX0_7, 0x0300a094); /* NVclass $094, patchcfg ROP_AND, userclip enable, 450 * context surface0 valid, nv10+: little endian */ 451 } 452 else 453 { 454 /* ... for cmd NV4_DX5_TEXTURE_TRIANGLE */ 455 ACCW(PR_CTX0_7, 0x0300a054); /* NVclass $054, patchcfg ROP_AND, userclip enable, 456 * context surface0 valid */ 457 } 458 ACCW(PR_CTX1_7, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 459 ACCW(PR_CTX2_7, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 460 ACCW(PR_CTX3_7, 0x00000000); /* method traps disabled */ 461 /* setup set '8' ... */ 462 if (si->ps.card_arch >= NV10A) 463 { 464 /* ... for cmd NV10_DX6_MULTI_TEXTURE_TRIANGLE (not used) */ 465 ACCW(PR_CTX0_8, 0x0300a095); /* NVclass $095, patchcfg ROP_AND, userclip enable, 466 * context surface0 valid, nv10+: little endian */ 467 } 468 else 469 { 470 /* ... for cmd NV4_DX6_MULTI_TEXTURE_TRIANGLE (not used) */ 471 ACCW(PR_CTX0_8, 0x0300a055); /* NVclass $055, patchcfg ROP_AND, userclip enable, 472 * context surface0 valid */ 473 } 474 ACCW(PR_CTX1_8, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 475 ACCW(PR_CTX2_8, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 476 ACCW(PR_CTX3_8, 0x00000000); /* method traps disabled */ 477 /* setup set '9' for cmd NV_SCALED_IMAGE_FROM_MEMORY */ 478 ACCW(PR_CTX0_9, 0x01018077); /* NVclass $077, patchcfg SRC_COPY, 479 * context surface0 valid, nv10+: little endian */ 480 ACCW(PR_CTX1_9, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 481 ACCW(PR_CTX2_9, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 482 ACCW(PR_CTX3_9, 0x00000000); /* method traps disabled */ 483 /* setup set 'A' for cmd NV1_RENDER_SOLID_LIN (not used) */ 484 ACCW(PR_CTX0_A, 0x0300a01c); /* NVclass $01c, patchcfg ROP_AND, userclip enable, 485 * context surface0 valid, nv10+: little endian */ 486 ACCW(PR_CTX1_A, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 487 ACCW(PR_CTX2_A, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 488 ACCW(PR_CTX3_A, 0x00000000); /* method traps disabled */ 489 //2007 3D tests.. 490 /* setup set 'B' ... */ 491 if (si->ps.card_type == NV15) 492 { 493 /* ... for cmd NV11_TCL_PRIMITIVE_3D */ 494 ACCW(PR_CTX0_B, 0x0300a096); /* NVclass $096, patchcfg ROP_AND, userclip enable, 495 * context surface0 valid, nv10+: little endian */ 496 ACCW(PR_CTX1_B, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 497 ACCW(PR_CTX2_B, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 498 ACCW(PR_CTX3_B, 0x00000000); /* method traps disabled */ 499 } 500 /* setup DMA set pointed at by PF_CACH1_DMAI */ 501 if (si->engine.agp_mode) 502 { 503 /* DMA page table present and of linear type; 504 * DMA class is $002 (b0-11); 505 * DMA target node is AGP */ 506 ACCW(PR_CTX0_C, 0x00033002); 507 } 508 else 509 { 510 /* DMA page table present and of linear type; 511 * DMA class is $002 (b0-11); 512 * DMA target node is PCI */ 513 ACCW(PR_CTX0_C, 0x00023002); 514 } 515 ACCW(PR_CTX1_C, 0x000fffff); /* DMA limit: tablesize is 1M bytes */ 516 ACCW(PR_CTX2_C, (((uint32)((uint8 *)(si->dma_buffer_pci))) | 0x00000002)); 517 /* DMA access type is READ_AND_WRITE; 518 * table is located in main system RAM (b12-31): 519 * It's adress needs to be at a 4kb boundary! */ 520 521 /* set the 3D rendering functions colordepth via BPIXEL's 'depth 2' */ 522 /* note: 523 * setting a depth to 'invalid' (zero) makes the engine report 524 * ready with drawing 'immediately'. */ 525 //fixme: NV30A and above (probably) needs to be corrected... 526 switch(si->dm.space) 527 { 528 case B_CMAP8: 529 if (si->ps.card_arch < NV30A) 530 /* set depth 2: $1 = Y8 */ 531 ACCW(BPIXEL, 0x00000100); 532 else 533 /* set depth 0-1: $1 = Y8, $2 = X1R5G5B5_Z1R5G5B5 */ 534 ACCW(BPIXEL, 0x00000021); 535 break; 536 case B_RGB15_LITTLE: 537 if (si->ps.card_arch < NV30A) 538 /* set depth 2: $4 = A1R5G5B5 */ 539 ACCW(BPIXEL, 0x00000400); 540 else 541 /* set depth 0-1: $2 = X1R5G5B5_Z1R5G5B5, $4 = A1R5G5B5 */ 542 ACCW(BPIXEL, 0x00000042); 543 break; 544 case B_RGB16_LITTLE: 545 if (si->ps.card_arch < NV30A) 546 /* set depth 2: $5 = R5G6B5 */ 547 ACCW(BPIXEL, 0x00000500); 548 else 549 /* set depth 0-1: $5 = R5G6B5, $a = X1A7R8G8B8_O1A7R8G8B8 */ 550 ACCW(BPIXEL, 0x000000a5); 551 break; 552 case B_RGB32_LITTLE: 553 case B_RGBA32_LITTLE: 554 if (si->ps.card_arch < NV30A) 555 /* set depth 2: $c = A8R8G8B8 */ 556 ACCW(BPIXEL, 0x00000c00); 557 else 558 /* set depth 0-1: $7 = X8R8G8B8_Z8R8G8B8, $e = V8YB8U8YA8 */ 559 ACCW(BPIXEL, 0x000000e7); 560 break; 561 default: 562 LOG(8,("ACC: init, invalid bit depth\n")); 563 return B_ERROR; 564 } 565 } 566 567 if (si->ps.card_arch == NV04A) 568 { 569 /* do a explicit engine reset */ 570 ACCW(DEBUG0, 0x000001ff); 571 572 /* init some function blocks */ 573 /* DEBUG0, b20 and b21 should be high, this has a big influence on 574 * 3D rendering speed! (on all cards, confirmed) */ 575 ACCW(DEBUG0, 0x1230c000); 576 /* DEBUG1, b19 = 1 increases 3D rendering speed on TNT2 (M64) a bit, 577 * TNT1 rendering speed stays the same (all cards confirmed) */ 578 ACCW(DEBUG1, 0x72191101); 579 ACCW(DEBUG2, 0x11d5f071); 580 ACCW(DEBUG3, 0x0004ff31); 581 /* init OP methods */ 582 ACCW(DEBUG3, 0x4004ff31); 583 584 /* disable all acceleration engine INT reguests */ 585 ACCW(ACC_INTE, 0x00000000); 586 /* reset all acceration engine INT status bits */ 587 ACCW(ACC_INTS, 0xffffffff); 588 /* context control enabled */ 589 ACCW(NV04_CTX_CTRL, 0x10010100); 590 /* all acceleration buffers, pitches and colors are valid */ 591 ACCW(NV04_ACC_STAT, 0xffffffff); 592 /* enable acceleration engine command FIFO */ 593 ACCW(FIFO_EN, 0x00000001); 594 595 /* setup location of active screen in framebuffer */ 596 ACCW(OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 597 ACCW(OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 598 /* setup accesible card memory range */ 599 ACCW(BLIMIT0, (si->ps.memory_size - 1)); 600 ACCW(BLIMIT1, (si->ps.memory_size - 1)); 601 602 /* pattern shape value = 8x8, 2 color */ 603 //fixme: not needed, unless the engine has a hardware fault (setting via cmd)! 604 //ACCW(PAT_SHP, 0x00000000); 605 /* Pgraph Beta AND value (fraction) b23-30 */ 606 ACCW(BETA_AND_VAL, 0xffffffff); 607 } 608 else 609 { 610 /* do a explicit engine reset */ 611 ACCW(DEBUG0, 0xffffffff); 612 ACCW(DEBUG0, 0x00000000); 613 /* disable all acceleration engine INT reguests */ 614 ACCW(ACC_INTE, 0x00000000); 615 /* reset all acceration engine INT status bits */ 616 ACCW(ACC_INTS, 0xffffffff); 617 /* context control enabled */ 618 ACCW(NV10_CTX_CTRL, 0x10010100); 619 /* all acceleration buffers, pitches and colors are valid */ 620 ACCW(NV10_ACC_STAT, 0xffffffff); 621 /* enable acceleration engine command FIFO */ 622 ACCW(FIFO_EN, 0x00000001); 623 /* setup surface type: 624 * b1-0 = %01 = surface type is non-swizzle; 625 * this is needed to enable 3D on NV1x (confirmed) and maybe others? */ 626 ACCW(NV10_SURF_TYP, ((ACCR(NV10_SURF_TYP)) & 0x0007ff00)); 627 ACCW(NV10_SURF_TYP, ((ACCR(NV10_SURF_TYP)) | 0x00020101)); 628 } 629 630 if (si->ps.card_arch == NV10A) 631 { 632 /* init some function blocks */ 633 ACCW(DEBUG1, 0x00118700); 634 /* DEBUG2 has a big influence on 3D speed for NV11 and NV15 635 * (confirmed b3 and b18 should both be '1' on both cards!) 636 * (b16 should also be '1', increases 3D speed on NV11 a bit more) */ 637 ACCW(DEBUG2, 0x24fd2ad9); 638 ACCW(DEBUG3, 0x55de0030); 639 /* NV10_DEBUG4 has a big influence on 3D speed for NV11, NV15 and NV18 640 * (confirmed b14 and b15 should both be '1' on these cards!) 641 * (confirmed b8 should be '0' on NV18 to prevent complete engine crash!) */ 642 ACCW(NV10_DEBUG4, 0x0000c000); 643 644 /* copy tile setup stuff from 'source' to acc engine */ 645 for (cnt = 0; cnt < 32; cnt++) 646 { 647 NV_REG32(NVACC_NV10_TIL0AD + (cnt << 2)) = 648 NV_REG32(NVACC_NV10_FBTIL0AD + (cnt << 2)); 649 } 650 651 /* setup location of active screen in framebuffer */ 652 ACCW(OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 653 ACCW(OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 654 /* setup accesible card memory range */ 655 ACCW(BLIMIT0, (si->ps.memory_size - 1)); 656 ACCW(BLIMIT1, (si->ps.memory_size - 1)); 657 658 /* pattern shape value = 8x8, 2 color */ 659 //fixme: not needed, unless the engine has a hardware fault (setting via cmd)! 660 //ACCW(PAT_SHP, 0x00000000); 661 /* Pgraph Beta AND value (fraction) b23-30 */ 662 ACCW(BETA_AND_VAL, 0xffffffff); 663 } 664 665 if (si->ps.card_arch >= NV20A) 666 { 667 switch (si->ps.card_arch) 668 { 669 case NV40A: 670 /* init some function blocks */ 671 ACCW(DEBUG1, 0x401287c0); 672 ACCW(DEBUG3, 0x60de8051); 673 /* disable specific functions, but enable SETUP_SPARE2 register */ 674 ACCW(NV10_DEBUG4, 0x00008000); 675 /* set limit_viol_pix_adress(?): more likely something unknown.. */ 676 ACCW(NV25_WHAT0, 0x00be3c5f); 677 678 /* setup some unknown serially accessed registers (?) */ 679 tmp = (NV_REG32(NV32_NV4X_WHAT0) & 0x000000ff); 680 for (cnt = 0; (tmp && !(tmp & 0x00000001)); tmp >>= 1, cnt++); 681 { 682 ACCW(NV4X_WHAT2, cnt); 683 } 684 685 /* unknown.. */ 686 switch (si->ps.card_type) 687 { 688 case NV40: 689 case NV45: 690 /* and NV48: but these are pgm'd as NV45 currently */ 691 ACCW(NV40_WHAT0, 0x83280fff); 692 ACCW(NV40_WHAT1, 0x000000a0); 693 ACCW(NV40_WHAT2, 0x0078e366); 694 ACCW(NV40_WHAT3, 0x0000014c); 695 break; 696 case NV41: 697 /* and ID == 0x012x: but no cards defined yet */ 698 ACCW(NV40P_WHAT0, 0x83280eff); 699 ACCW(NV40P_WHAT1, 0x000000a0); 700 ACCW(NV40P_WHAT2, 0x007596ff); 701 ACCW(NV40P_WHAT3, 0x00000108); 702 break; 703 case NV43: 704 ACCW(NV40P_WHAT0, 0x83280eff); 705 ACCW(NV40P_WHAT1, 0x000000a0); 706 ACCW(NV40P_WHAT2, 0x0072cb77); 707 ACCW(NV40P_WHAT3, 0x00000108); 708 break; 709 case NV44: 710 case G72: 711 ACCW(NV40P_WHAT0, 0x83280eff); 712 ACCW(NV40P_WHAT1, 0x000000a0); 713 714 NV_REG32(NV32_NV44_WHAT10) = NV_REG32(NV32_NV10STRAPINFO); 715 NV_REG32(NV32_NV44_WHAT11) = 0x00000000; 716 NV_REG32(NV32_NV44_WHAT12) = 0x00000000; 717 NV_REG32(NV32_NV44_WHAT13) = NV_REG32(NV32_NV10STRAPINFO); 718 719 ACCW(NV44_WHAT2, 0x00000000); 720 ACCW(NV44_WHAT3, 0x00000000); 721 break; 722 /* case NV44 type 2: (cardID 0x022x) 723 //fixme if needed: doesn't seem to need the strapinfo thing.. 724 ACCW(NV40P_WHAT0, 0x83280eff); 725 ACCW(NV40P_WHAT1, 0x000000a0); 726 727 ACCW(NV44_WHAT2, 0x00000000); 728 ACCW(NV44_WHAT3, 0x00000000); 729 break; 730 */ case G70: 731 case G71: 732 case G73: 733 ACCW(NV40P_WHAT0, 0x83280eff); 734 ACCW(NV40P_WHAT1, 0x000000a0); 735 ACCW(NV40P_WHAT2, 0x07830610); 736 ACCW(NV40P_WHAT3, 0x0000016a); 737 break; 738 default: 739 ACCW(NV40P_WHAT0, 0x83280eff); 740 ACCW(NV40P_WHAT1, 0x000000a0); 741 break; 742 } 743 744 ACCW(NV10_TIL3PT, 0x2ffff800); 745 ACCW(NV10_TIL3ST, 0x00006000); 746 ACCW(NV4X_WHAT1, 0x01000000); 747 /* engine data source DMA instance = $1140 */ 748 ACCW(NV4X_DMA_SRC, 0x00001140); 749 break; 750 case NV30A: 751 /* init some function blocks, but most is unknown.. */ 752 ACCW(DEBUG1, 0x40108700); 753 ACCW(NV25_WHAT1, 0x00140000); 754 ACCW(DEBUG3, 0xf00e0431); 755 ACCW(NV10_DEBUG4, 0x00008000); 756 ACCW(NV25_WHAT0, 0xf04b1f36); 757 ACCW(NV20_WHAT3, 0x1002d888); 758 ACCW(NV25_WHAT2, 0x62ff007f); 759 break; 760 case NV20A: 761 /* init some function blocks, but most is unknown.. */ 762 ACCW(DEBUG1, 0x00118700); 763 ACCW(DEBUG3, 0xf20e0431); 764 ACCW(NV10_DEBUG4, 0x00000000); 765 ACCW(NV20_WHAT1, 0x00000040); 766 if (si->ps.card_type < NV25) 767 { 768 ACCW(NV20_WHAT2, 0x00080000); 769 ACCW(NV10_DEBUG5, 0x00000005); 770 ACCW(NV20_WHAT3, 0x45caa208); 771 ACCW(NV20_WHAT4, 0x24000000); 772 ACCW(NV20_WHAT5, 0x00000040); 773 774 /* copy some fixed RAM(?) configuration info(?) to some indexed registers: */ 775 /* b16-24 is select; b2-13 is adress in 32-bit words */ 776 ACCW(RDI_INDEX, 0x00e00038); 777 /* data is 32-bit */ 778 ACCW(RDI_DATA, 0x00000030); 779 /* copy some fixed RAM(?) configuration info(?) to some indexed registers: */ 780 /* b16-24 is select; b2-13 is adress in 32-bit words */ 781 ACCW(RDI_INDEX, 0x00e10038); 782 /* data is 32-bit */ 783 ACCW(RDI_DATA, 0x00000030); 784 } 785 else 786 { 787 ACCW(NV25_WHAT1, 0x00080000); 788 ACCW(NV25_WHAT0, 0x304b1fb6); 789 ACCW(NV20_WHAT3, 0x18b82880); 790 ACCW(NV20_WHAT4, 0x44000000); 791 ACCW(NV20_WHAT5, 0x40000080); 792 ACCW(NV25_WHAT2, 0x000000ff); 793 } 794 break; 795 } 796 797 /* NV20A, NV30A and NV40A: */ 798 /* copy tile setup stuff from previous setup 'source' to acc engine 799 * (pattern colorRAM?) */ 800 if ((si->ps.card_type <= NV40) || (si->ps.card_type == NV45)) 801 { 802 for (cnt = 0; cnt < 32; cnt++) 803 { 804 /* copy NV10_FBTIL0AD upto/including NV10_FBTIL7ST */ 805 NV_REG32(NVACC_NV20_WHAT0 + (cnt << 2)) = 806 NV_REG32(NVACC_NV10_FBTIL0AD + (cnt << 2)); 807 808 /* copy NV10_FBTIL0AD upto/including NV10_FBTIL7ST */ 809 NV_REG32(NVACC_NV20_2_WHAT0 + (cnt << 2)) = 810 NV_REG32(NVACC_NV10_FBTIL0AD + (cnt << 2)); 811 } 812 } 813 else 814 { 815 /* NV41, 43, 44, G70 and later */ 816 if (si->ps.card_type >= G70) 817 { 818 for (cnt = 0; cnt < 60; cnt++) 819 { 820 /* copy NV41_FBTIL0AD upto/including G70_FBTILEST */ 821 NV_REG32(NVACC_NV41_WHAT0 + (cnt << 2)) = 822 NV_REG32(NVACC_NV41_FBTIL0AD + (cnt << 2)); 823 824 /* copy NV41_FBTIL0AD upto/including G70_FBTILEST */ 825 NV_REG32(NVACC_NV20_2_WHAT0 + (cnt << 2)) = 826 NV_REG32(NVACC_NV41_FBTIL0AD + (cnt << 2)); 827 } 828 } 829 else 830 { 831 /* NV41, 43, 44 */ 832 for (cnt = 0; cnt < 48; cnt++) 833 { 834 /* copy NV41_FBTIL0AD upto/including NV41_FBTILBST */ 835 NV_REG32(NVACC_NV20_WHAT0 + (cnt << 2)) = 836 NV_REG32(NVACC_NV41_FBTIL0AD + (cnt << 2)); 837 838 if (si->ps.card_type != NV44) 839 { 840 /* copy NV41_FBTIL0AD upto/including NV41_FBTILBST */ 841 NV_REG32(NVACC_NV20_2_WHAT0 + (cnt << 2)) = 842 NV_REG32(NVACC_NV41_FBTIL0AD + (cnt << 2)); 843 } 844 } 845 } 846 } 847 848 if (si->ps.card_arch >= NV40A) 849 { 850 if ((si->ps.card_type == NV40) || (si->ps.card_type == NV45)) 851 { 852 /* copy some RAM configuration info(?) */ 853 ACCW(NV20_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0)); 854 ACCW(NV20_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1)); 855 ACCW(NV40_WHAT_T2, NV_REG32(NV32_PFB_CONFIG_0)); 856 ACCW(NV40_WHAT_T3, NV_REG32(NV32_PFB_CONFIG_1)); 857 858 /* setup location of active screen in framebuffer */ 859 ACCW(NV20_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 860 ACCW(NV20_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 861 /* setup accesible card memory range */ 862 ACCW(NV20_BLIMIT6, (si->ps.memory_size - 1)); 863 ACCW(NV20_BLIMIT7, (si->ps.memory_size - 1)); 864 } 865 else 866 { 867 /* NV41, 43, 44, G70 and later */ 868 869 /* copy some RAM configuration info(?) */ 870 if (si->ps.card_type >= G70) 871 { 872 ACCW(G70_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0)); 873 ACCW(G70_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1)); 874 } 875 else 876 { 877 /* NV41, 43, 44 */ 878 ACCW(NV40P_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0)); 879 ACCW(NV40P_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1)); 880 } 881 ACCW(NV40P_WHAT_T2, NV_REG32(NV32_PFB_CONFIG_0)); 882 ACCW(NV40P_WHAT_T3, NV_REG32(NV32_PFB_CONFIG_1)); 883 884 /* setup location of active screen in framebuffer */ 885 ACCW(NV40P_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 886 ACCW(NV40P_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 887 /* setup accesible card memory range */ 888 ACCW(NV40P_BLIMIT6, (si->ps.memory_size - 1)); 889 ACCW(NV40P_BLIMIT7, (si->ps.memory_size - 1)); 890 } 891 } 892 else /* NV20A and NV30A: */ 893 { 894 /* copy some RAM configuration info(?) */ 895 ACCW(NV20_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0)); 896 ACCW(NV20_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1)); 897 /* copy some RAM configuration info(?) to some indexed registers: */ 898 /* b16-24 is select; b2-13 is adress in 32-bit words */ 899 ACCW(RDI_INDEX, 0x00ea0000); 900 /* data is 32-bit */ 901 ACCW(RDI_DATA, NV_REG32(NV32_PFB_CONFIG_0)); 902 /* b16-24 is select; b2-13 is adress in 32-bit words */ 903 ACCW(RDI_INDEX, 0x00ea0004); 904 /* data is 32-bit */ 905 ACCW(RDI_DATA, NV_REG32(NV32_PFB_CONFIG_1)); 906 907 /* setup location of active screen in framebuffer */ 908 ACCW(NV20_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 909 ACCW(NV20_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 910 /* setup accesible card memory range */ 911 ACCW(NV20_BLIMIT6, (si->ps.memory_size - 1)); 912 ACCW(NV20_BLIMIT7, (si->ps.memory_size - 1)); 913 } 914 915 /* NV20A, NV30A and NV40A: */ 916 /* setup some acc engine tile stuff */ 917 ACCW(NV10_TIL2AD, 0x00000000); 918 ACCW(NV10_TIL0ED, 0xffffffff); 919 } 920 921 /* all cards: */ 922 /* setup clipping: rect size is 32768 x 32768, probably max. setting */ 923 /* note: 924 * can also be done via the NV_IMAGE_BLACK_RECTANGLE engine command. */ 925 ACCW(ABS_UCLP_XMIN, 0x00000000); 926 ACCW(ABS_UCLP_YMIN, 0x00000000); 927 ACCW(ABS_UCLP_XMAX, 0x00007fff); 928 ACCW(ABS_UCLP_YMAX, 0x00007fff); 929 930 /* setup sync parameters for NV12_IMAGE_BLIT command for the current mode: 931 * values given are CRTC vertical counter limit values. The NV12 command will wait 932 * for the specified's CRTC's vertical counter to be in between the given values */ 933 if (si->ps.card_type >= NV11) 934 { 935 ACCW(NV11_CRTC_LO, si->dm.timing.v_display - 1); 936 ACCW(NV11_CRTC_HI, si->dm.timing.v_display + 1); 937 } 938 939 /*** PFIFO ***/ 940 /* (setup caches) */ 941 /* disable caches reassign */ 942 ACCW(PF_CACHES, 0x00000000); 943 /* PFIFO mode: channel 0 is in DMA mode, channels 1 - 32 are in PIO mode */ 944 ACCW(PF_MODE, 0x00000001); 945 /* cache1 push0 access disabled */ 946 ACCW(PF_CACH1_PSH0, 0x00000000); 947 /* cache1 pull0 access disabled */ 948 ACCW(PF_CACH1_PUL0, 0x00000000); 949 /* cache1 push1 mode = DMA */ 950 if (si->ps.card_arch >= NV40A) 951 ACCW(PF_CACH1_PSH1, 0x00010000); 952 else 953 ACCW(PF_CACH1_PSH1, 0x00000100); 954 /* cache1 DMA Put offset = 0 (b2-28) */ 955 ACCW(PF_CACH1_DMAP, 0x00000000); 956 /* cache1 DMA Get offset = 0 (b2-28) */ 957 ACCW(PF_CACH1_DMAG, 0x00000000); 958 /* cache1 DMA instance adress = $114e (b0-15); 959 * instance being b4-19 with baseadress NV_PRAMIN_CTX_0 (0x00700000). */ 960 /* note: 961 * should point to a DMA definition in CTX register space (which is sort of RAM). 962 * This define tells the engine where the DMA cmd buffer is and what it's size is. 963 * Inside that cmd buffer you'll find the actual issued engine commands. */ 964 if (si->ps.card_arch >= NV40A) 965 ACCW(PF_CACH1_DMAI, 0x00001150); 966 else 967 //2007 3d test.. 968 ACCW(PF_CACH1_DMAI, 0x0000114e); 969 /* cache0 push0 access disabled */ 970 ACCW(PF_CACH0_PSH0, 0x00000000); 971 /* cache0 pull0 access disabled */ 972 ACCW(PF_CACH0_PUL0, 0x00000000); 973 /* RAM HT (hash table) baseadress = $10000 (b4-8), size = 4k, 974 * search = 128 (is byte offset between hash 'sets') */ 975 /* note: 976 * so HT base is $00710000, last is $00710fff. 977 * In this space you define the engine command handles (HT_HANDL_XX), which 978 * in turn points to the defines in CTX register space (which is sort of RAM) */ 979 ACCW(PF_RAMHT, 0x03000100); 980 /* RAM FC baseadress = $11000 (b3-8) (size is fixed to 0.5k(?)) */ 981 /* note: 982 * so FC base is $00711000, last is $007111ff. (not used?) */ 983 ACCW(PF_RAMFC, 0x00000110); 984 /* RAM RO baseadress = $11200 (b1-8), size = 0.5k */ 985 /* note: 986 * so RO base is $00711200, last is $007113ff. (not used?) */ 987 /* note also: 988 * This means(?) the PRAMIN CTX registers are accessible from base $00711400. */ 989 ACCW(PF_RAMRO, 0x00000112); 990 /* PFIFO size: ch0-15 = 512 bytes, ch16-31 = 124 bytes */ 991 ACCW(PF_SIZE, 0x0000ffff); 992 /* cache1 hash instance = $ffff (b0-15) */ 993 ACCW(PF_CACH1_HASH, 0x0000ffff); 994 /* disable all PFIFO INTs */ 995 ACCW(PF_INTEN, 0x00000000); 996 /* reset all PFIFO INT status bits */ 997 ACCW(PF_INTSTAT, 0xffffffff); 998 /* cache0 pull0 engine = acceleration engine (graphics) */ 999 ACCW(PF_CACH0_PUL1, 0x00000001); 1000 /* cache1 DMA control: disable some stuff */ 1001 ACCW(PF_CACH1_DMAC, 0x00000000); 1002 /* cache1 engine 0 upto/including 7 is software (could also be graphics or DVD) */ 1003 ACCW(PF_CACH1_ENG, 0x00000000); 1004 /* cache1 DMA fetch: trigger at 128 bytes, size is 32 bytes, max requests is 15, 1005 * use little endian */ 1006 ACCW(PF_CACH1_DMAF, 0x000f0078); 1007 /* cache1 DMA push: b0 = 1: access is enabled */ 1008 ACCW(PF_CACH1_DMAS, 0x00000001); 1009 /* cache1 push0 access enabled */ 1010 ACCW(PF_CACH1_PSH0, 0x00000001); 1011 /* cache1 pull0 access enabled */ 1012 ACCW(PF_CACH1_PUL0, 0x00000001); 1013 /* cache1 pull1 engine = acceleration engine (graphics) */ 1014 ACCW(PF_CACH1_PUL1, 0x00000001); 1015 /* enable PFIFO caches reassign */ 1016 ACCW(PF_CACHES, 0x00000001); 1017 1018 /* setup 3D specifics */ 1019 nv_init_for_3D_dma(); 1020 1021 /*** init acceleration engine command info ***/ 1022 /* set object handles */ 1023 /* note: 1024 * probably depending on some other setup, there are 8 or 32 FIFO channels 1025 * available. Assuming the current setup only has 8 channels because the 'rest' 1026 * isn't setup here... */ 1027 si->engine.fifo.handle[0] = NV_ROP5_SOLID; 1028 si->engine.fifo.handle[1] = NV_IMAGE_BLACK_RECTANGLE; 1029 si->engine.fifo.handle[2] = NV_IMAGE_PATTERN; 1030 si->engine.fifo.handle[3] = NV4_SURFACE; /* NV10_CONTEXT_SURFACES_2D is identical */ 1031 si->engine.fifo.handle[4] = NV_IMAGE_BLIT; 1032 si->engine.fifo.handle[5] = NV4_GDI_RECTANGLE_TEXT; 1033 si->engine.fifo.handle[6] = NV4_CONTEXT_SURFACES_ARGB_ZS;//NV1_RENDER_SOLID_LIN; 1034 si->engine.fifo.handle[7] = NV4_DX5_TEXTURE_TRIANGLE; 1035 /* preset no FIFO channels assigned to cmd's */ 1036 for (cnt = 0; cnt < 0x20; cnt++) 1037 { 1038 si->engine.fifo.ch_ptr[cnt] = 0; 1039 } 1040 /* set handle's pointers to their assigned FIFO channels */ 1041 /* note: 1042 * b0-1 aren't used as adressbits. Using b0 to indicate a valid pointer. */ 1043 for (cnt = 0; cnt < 0x08; cnt++) 1044 { 1045 si->engine.fifo.ch_ptr[(si->engine.fifo.handle[cnt])] = 1046 (0x00000001 + (cnt * 0x00002000)); 1047 } 1048 1049 /*** init DMA command buffer info ***/ 1050 if (si->ps.card_arch >= NV40A) //main mem DMA buf on pre-NV40 1051 { 1052 si->dma_buffer = (void *)((char *)si->framebuffer + 1053 ((si->ps.memory_size - 1) & 0xffff8000)); 1054 } 1055 LOG(4,("ACC_DMA: command buffer is at adress $%08x\n", 1056 ((uint32)(si->dma_buffer)))); 1057 /* we have issued no DMA cmd's to the engine yet */ 1058 si->engine.dma.put = 0; 1059 /* the current first free adress in the DMA buffer is at offset 0 */ 1060 si->engine.dma.current = 0; 1061 /* the DMA buffer can hold 8k 32-bit words (it's 32kb in size), 1062 * or 256k 32-bit words (1Mb in size) dependant on architecture (for now) */ 1063 /* note: 1064 * one word is reserved at the end of the DMA buffer to be able to instruct the 1065 * engine to do a buffer wrap-around! 1066 * (DMA opcode 'noninc method': issue word $20000000.) */ 1067 if (si->ps.card_arch < NV40A) 1068 si->engine.dma.max = ((1 * 1024 * 1024) >> 2) - 1; 1069 else 1070 si->engine.dma.max = 8192 - 1; 1071 /* note the current free space we have left in the DMA buffer */ 1072 si->engine.dma.free = si->engine.dma.max - si->engine.dma.current; 1073 1074 /*** init FIFO via DMA command buffer. ***/ 1075 /* wait for room in fifo for new FIFO assigment cmds if needed: */ 1076 if (si->ps.card_arch >= NV40A) 1077 { 1078 if (nv_acc_fifofree_dma(12) != B_OK) return B_ERROR; 1079 } 1080 else 1081 { 1082 if (nv_acc_fifofree_dma(16) != B_OK) return B_ERROR; 1083 } 1084 1085 /* program new FIFO assignments */ 1086 /* Raster OPeration: */ 1087 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH0, si->engine.fifo.handle[0]); 1088 /* Clip: */ 1089 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH1, si->engine.fifo.handle[1]); 1090 /* Pattern: */ 1091 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH2, si->engine.fifo.handle[2]); 1092 /* 2D Surfaces: */ 1093 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH3, si->engine.fifo.handle[3]); 1094 /* Blit: */ 1095 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH4, si->engine.fifo.handle[4]); 1096 /* Bitmap: */ 1097 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH5, si->engine.fifo.handle[5]); 1098 if (si->ps.card_arch < NV40A) 1099 { 1100 /* 3D surfaces: (3D related only) */ 1101 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH6, si->engine.fifo.handle[6]); 1102 /* Textured Triangle: (3D only) */ 1103 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH7, si->engine.fifo.handle[7]); 1104 } 1105 1106 /*** Set pixel width ***/ 1107 switch(si->dm.space) 1108 { 1109 case B_CMAP8: 1110 surf_depth = 0x00000001; 1111 cmd_depth = 0x00000003; 1112 break; 1113 case B_RGB15_LITTLE: 1114 case B_RGB16_LITTLE: 1115 surf_depth = 0x00000004; 1116 cmd_depth = 0x00000001; 1117 break; 1118 case B_RGB32_LITTLE: 1119 case B_RGBA32_LITTLE: 1120 surf_depth = 0x00000006; 1121 cmd_depth = 0x00000003; 1122 break; 1123 default: 1124 LOG(8,("ACC_DMA: init, invalid bit depth\n")); 1125 return B_ERROR; 1126 } 1127 1128 /* wait for room in fifo for surface setup cmd if needed */ 1129 if (nv_acc_fifofree_dma(5) != B_OK) return B_ERROR; 1130 /* now setup 2D surface (writing 5 32bit words) */ 1131 nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 4); 1132 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = surf_depth; /* Format */ 1133 /* setup screen pitch */ 1134 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1135 ((si->fbc.bytes_per_row & 0x0000ffff) | (si->fbc.bytes_per_row << 16)); /* Pitch */ 1136 /* setup screen location */ 1137 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1138 ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer); /* OffsetSource */ 1139 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1140 ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer); /* OffsetDest */ 1141 1142 /* wait for room in fifo for pattern colordepth setup cmd if needed */ 1143 if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR; 1144 /* set pattern colordepth (writing 2 32bit words) */ 1145 nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETCOLORFORMAT, 1); 1146 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ 1147 1148 /* wait for room in fifo for bitmap colordepth setup cmd if needed */ 1149 if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR; 1150 /* set bitmap colordepth (writing 2 32bit words) */ 1151 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_SETCOLORFORMAT, 1); 1152 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ 1153 1154 /* Load our pattern into the engine: */ 1155 /* wait for room in fifo for pattern cmd if needed. */ 1156 if (nv_acc_fifofree_dma(7) != B_OK) return B_ERROR; 1157 /* now setup pattern (writing 7 32bit words) */ 1158 nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETSHAPE, 1); 1159 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000000; /* SetShape: 0 = 8x8, 1 = 64x1, 2 = 1x64 */ 1160 nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETCOLOR0, 4); 1161 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetColor0 */ 1162 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetColor1 */ 1163 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetPattern[0] */ 1164 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetPattern[1] */ 1165 1166 /* tell the engine to fetch and execute all (new) commands in the DMA buffer */ 1167 nv_start_dma(); 1168 1169 return B_OK; 1170 } 1171 1172 static void nv_init_for_3D_dma(void) 1173 { 1174 /* setup PGRAPH unknown registers and modify (pre-cleared) pipe stuff for 3D use */ 1175 if (si->ps.card_arch >= NV10A) 1176 { 1177 /* setup unknown PGRAPH stuff */ 1178 ACCW(PGWHAT_00, 0x00000000); 1179 ACCW(PGWHAT_01, 0x00000000); 1180 ACCW(PGWHAT_02, 0x00000000); 1181 ACCW(PGWHAT_03, 0x00000000); 1182 1183 ACCW(PGWHAT_04, 0x00001000); 1184 ACCW(PGWHAT_05, 0x00001000); 1185 ACCW(PGWHAT_06, 0x4003ff80); 1186 1187 ACCW(PGWHAT_07, 0x00000000); 1188 ACCW(PGWHAT_08, 0x00000000); 1189 ACCW(PGWHAT_09, 0x00000000); 1190 ACCW(PGWHAT_0A, 0x00000000); 1191 ACCW(PGWHAT_0B, 0x00000000); 1192 1193 ACCW(PGWHAT_0C, 0x00080008); 1194 ACCW(PGWHAT_0D, 0x00080008); 1195 1196 ACCW(PGWHAT_0E, 0x00000000); 1197 ACCW(PGWHAT_0F, 0x00000000); 1198 ACCW(PGWHAT_10, 0x00000000); 1199 ACCW(PGWHAT_11, 0x00000000); 1200 ACCW(PGWHAT_12, 0x00000000); 1201 ACCW(PGWHAT_13, 0x00000000); 1202 ACCW(PGWHAT_14, 0x00000000); 1203 ACCW(PGWHAT_15, 0x00000000); 1204 ACCW(PGWHAT_16, 0x00000000); 1205 ACCW(PGWHAT_17, 0x00000000); 1206 ACCW(PGWHAT_18, 0x00000000); 1207 1208 ACCW(PGWHAT_19, 0x10000000); 1209 1210 ACCW(PGWHAT_1A, 0x00000000); 1211 ACCW(PGWHAT_1B, 0x00000000); 1212 ACCW(PGWHAT_1C, 0x00000000); 1213 ACCW(PGWHAT_1D, 0x00000000); 1214 ACCW(PGWHAT_1E, 0x00000000); 1215 ACCW(PGWHAT_1F, 0x00000000); 1216 ACCW(PGWHAT_20, 0x00000000); 1217 ACCW(PGWHAT_21, 0x00000000); 1218 1219 ACCW(PGWHAT_22, 0x08000000); 1220 1221 ACCW(PGWHAT_23, 0x00000000); 1222 ACCW(PGWHAT_24, 0x00000000); 1223 ACCW(PGWHAT_25, 0x00000000); 1224 ACCW(PGWHAT_26, 0x00000000); 1225 1226 ACCW(PGWHAT_27, 0x4b7fffff); 1227 1228 ACCW(PGWHAT_28, 0x00000000); 1229 ACCW(PGWHAT_29, 0x00000000); 1230 ACCW(PGWHAT_2A, 0x00000000); 1231 1232 /* setup window clipping */ 1233 /* b0-11 = min; b16-27 = max. 1234 * note: 1235 * probably two's complement values, so setting to max range here: 1236 * which would be -2048 upto/including +2047. */ 1237 /* horizontal */ 1238 ACCW(WINCLIP_H_0, 0x07ff0800); 1239 ACCW(WINCLIP_H_1, 0x07ff0800); 1240 ACCW(WINCLIP_H_2, 0x07ff0800); 1241 ACCW(WINCLIP_H_3, 0x07ff0800); 1242 ACCW(WINCLIP_H_4, 0x07ff0800); 1243 ACCW(WINCLIP_H_5, 0x07ff0800); 1244 ACCW(WINCLIP_H_6, 0x07ff0800); 1245 ACCW(WINCLIP_H_7, 0x07ff0800); 1246 /* vertical */ 1247 ACCW(WINCLIP_V_0, 0x07ff0800); 1248 ACCW(WINCLIP_V_1, 0x07ff0800); 1249 ACCW(WINCLIP_V_2, 0x07ff0800); 1250 ACCW(WINCLIP_V_3, 0x07ff0800); 1251 ACCW(WINCLIP_V_4, 0x07ff0800); 1252 ACCW(WINCLIP_V_5, 0x07ff0800); 1253 ACCW(WINCLIP_V_6, 0x07ff0800); 1254 ACCW(WINCLIP_V_7, 0x07ff0800); 1255 1256 /* setup (initialize) pipe: 1257 * needed to get valid 3D rendering on (at least) NV1x cards. Without this 1258 * those cards produce rubbish instead of 3D, although the engine itself keeps 1259 * running and 2D stays OK. */ 1260 1261 /* set eyetype to local, lightning etc. is off */ 1262 ACCW(NV10_XFMOD0, 0x10000000); 1263 /* disable all lights */ 1264 ACCW(NV10_XFMOD1, 0x00000000); 1265 1266 /* note: upon writing data into the PIPEDAT register, the PIPEADR is 1267 * probably auto-incremented! */ 1268 /* (pipe adress = b2-16, pipe data = b0-31) */ 1269 /* note: pipe adresses IGRAPH registers! */ 1270 ACCW(NV10_PIPEADR, 0x00006740); 1271 ACCW(NV10_PIPEDAT, 0x00000000); 1272 ACCW(NV10_PIPEDAT, 0x00000000); 1273 ACCW(NV10_PIPEDAT, 0x00000000); 1274 ACCW(NV10_PIPEDAT, 0x3f800000); 1275 1276 ACCW(NV10_PIPEADR, 0x00006750); 1277 ACCW(NV10_PIPEDAT, 0x40000000); 1278 ACCW(NV10_PIPEDAT, 0x40000000); 1279 ACCW(NV10_PIPEDAT, 0x40000000); 1280 ACCW(NV10_PIPEDAT, 0x40000000); 1281 1282 ACCW(NV10_PIPEADR, 0x00006760); 1283 ACCW(NV10_PIPEDAT, 0x00000000); 1284 ACCW(NV10_PIPEDAT, 0x00000000); 1285 ACCW(NV10_PIPEDAT, 0x3f800000); 1286 ACCW(NV10_PIPEDAT, 0x00000000); 1287 1288 ACCW(NV10_PIPEADR, 0x00006770); 1289 ACCW(NV10_PIPEDAT, 0xc5000000); 1290 ACCW(NV10_PIPEDAT, 0xc5000000); 1291 ACCW(NV10_PIPEDAT, 0x00000000); 1292 ACCW(NV10_PIPEDAT, 0x00000000); 1293 1294 ACCW(NV10_PIPEADR, 0x00006780); 1295 ACCW(NV10_PIPEDAT, 0x00000000); 1296 ACCW(NV10_PIPEDAT, 0x00000000); 1297 ACCW(NV10_PIPEDAT, 0x3f800000); 1298 ACCW(NV10_PIPEDAT, 0x00000000); 1299 1300 ACCW(NV10_PIPEADR, 0x000067a0); 1301 ACCW(NV10_PIPEDAT, 0x3f800000); 1302 ACCW(NV10_PIPEDAT, 0x3f800000); 1303 ACCW(NV10_PIPEDAT, 0x3f800000); 1304 ACCW(NV10_PIPEDAT, 0x3f800000); 1305 1306 ACCW(NV10_PIPEADR, 0x00006ab0); 1307 ACCW(NV10_PIPEDAT, 0x3f800000); 1308 ACCW(NV10_PIPEDAT, 0x3f800000); 1309 ACCW(NV10_PIPEDAT, 0x3f800000); 1310 1311 ACCW(NV10_PIPEADR, 0x00006ac0); 1312 ACCW(NV10_PIPEDAT, 0x00000000); 1313 ACCW(NV10_PIPEDAT, 0x00000000); 1314 ACCW(NV10_PIPEDAT, 0x00000000); 1315 1316 ACCW(NV10_PIPEADR, 0x00006c10); 1317 ACCW(NV10_PIPEDAT, 0xbf800000); 1318 1319 ACCW(NV10_PIPEADR, 0x00007030); 1320 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1321 1322 ACCW(NV10_PIPEADR, 0x00007040); 1323 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1324 1325 ACCW(NV10_PIPEADR, 0x00007050); 1326 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1327 1328 ACCW(NV10_PIPEADR, 0x00007060); 1329 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1330 1331 ACCW(NV10_PIPEADR, 0x00007070); 1332 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1333 1334 ACCW(NV10_PIPEADR, 0x00007080); 1335 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1336 1337 ACCW(NV10_PIPEADR, 0x00007090); 1338 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1339 1340 ACCW(NV10_PIPEADR, 0x000070a0); 1341 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1342 1343 ACCW(NV10_PIPEADR, 0x00006a80); 1344 ACCW(NV10_PIPEDAT, 0x00000000); 1345 ACCW(NV10_PIPEDAT, 0x00000000); 1346 ACCW(NV10_PIPEDAT, 0x3f800000); 1347 1348 ACCW(NV10_PIPEADR, 0x00006aa0); 1349 ACCW(NV10_PIPEDAT, 0x00000000); 1350 ACCW(NV10_PIPEDAT, 0x00000000); 1351 ACCW(NV10_PIPEDAT, 0x00000000); 1352 1353 /* select primitive type that will be drawn (tri's) */ 1354 ACCW(NV10_PIPEADR, 0x00000040); 1355 ACCW(NV10_PIPEDAT, 0x00000005); 1356 1357 ACCW(NV10_PIPEADR, 0x00006400); 1358 ACCW(NV10_PIPEDAT, 0x3f800000); 1359 ACCW(NV10_PIPEDAT, 0x3f800000); 1360 ACCW(NV10_PIPEDAT, 0x4b7fffff); 1361 ACCW(NV10_PIPEDAT, 0x00000000); 1362 1363 ACCW(NV10_PIPEADR, 0x00006410); 1364 ACCW(NV10_PIPEDAT, 0xc5000000); 1365 ACCW(NV10_PIPEDAT, 0xc5000000); 1366 ACCW(NV10_PIPEDAT, 0x00000000); 1367 ACCW(NV10_PIPEDAT, 0x00000000); 1368 1369 ACCW(NV10_PIPEADR, 0x00006420); 1370 ACCW(NV10_PIPEDAT, 0x00000000); 1371 ACCW(NV10_PIPEDAT, 0x00000000); 1372 ACCW(NV10_PIPEDAT, 0x00000000); 1373 ACCW(NV10_PIPEDAT, 0x00000000); 1374 1375 ACCW(NV10_PIPEADR, 0x00006430); 1376 ACCW(NV10_PIPEDAT, 0x00000000); 1377 ACCW(NV10_PIPEDAT, 0x00000000); 1378 ACCW(NV10_PIPEDAT, 0x00000000); 1379 ACCW(NV10_PIPEDAT, 0x00000000); 1380 1381 ACCW(NV10_PIPEADR, 0x000064c0); 1382 ACCW(NV10_PIPEDAT, 0x3f800000); 1383 ACCW(NV10_PIPEDAT, 0x3f800000); 1384 ACCW(NV10_PIPEDAT, 0x477fffff); 1385 ACCW(NV10_PIPEDAT, 0x3f800000); 1386 1387 ACCW(NV10_PIPEADR, 0x000064d0); 1388 ACCW(NV10_PIPEDAT, 0xc5000000); 1389 ACCW(NV10_PIPEDAT, 0xc5000000); 1390 ACCW(NV10_PIPEDAT, 0x00000000); 1391 ACCW(NV10_PIPEDAT, 0x00000000); 1392 1393 ACCW(NV10_PIPEADR, 0x000064e0); 1394 ACCW(NV10_PIPEDAT, 0xc4fff000); 1395 ACCW(NV10_PIPEDAT, 0xc4fff000); 1396 ACCW(NV10_PIPEDAT, 0x00000000); 1397 ACCW(NV10_PIPEDAT, 0x00000000); 1398 1399 ACCW(NV10_PIPEADR, 0x000064f0); 1400 ACCW(NV10_PIPEDAT, 0x00000000); 1401 ACCW(NV10_PIPEDAT, 0x00000000); 1402 ACCW(NV10_PIPEDAT, 0x00000000); 1403 ACCW(NV10_PIPEDAT, 0x00000000); 1404 1405 /* turn lightning on */ 1406 ACCW(NV10_XFMOD0, 0x30000000); 1407 /* set light 1 to infinite type, other lights remain off */ 1408 ACCW(NV10_XFMOD1, 0x00000004); 1409 1410 /* Z-buffer state is: 1411 * initialized, set to: 'fixed point' (integer?); Z-buffer; 16bits depth */ 1412 /* note: 1413 * other options possible are: floating point; 24bits depth; W-buffer */ 1414 ACCW(GLOB_STAT_0, 0x10000000); 1415 /* set DMA instance 2 and 3 to be invalid */ 1416 ACCW(GLOB_STAT_1, 0x00000000); 1417 } 1418 } 1419 1420 static void nv_start_dma(void) 1421 { 1422 uint32 dummy; 1423 1424 if (si->engine.dma.current != si->engine.dma.put) 1425 { 1426 si->engine.dma.put = si->engine.dma.current; 1427 /* flush used caches so we know for sure the DMA cmd buffer received all data. */ 1428 if (si->ps.card_arch < NV40A) 1429 { 1430 /* some CPU's support out-of-order processing (WinChip/Cyrix). Flush them. */ 1431 __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory"); 1432 /* read a non-cached adress to flush the cash */ 1433 dummy = ACCR(STATUS); 1434 } 1435 else 1436 { 1437 /* dummy read the first adress of the framebuffer to flush MTRR-WC buffers */ 1438 dummy = *((volatile uint32 *)(si->framebuffer)); 1439 } 1440 1441 /* actually start DMA to execute all commands now in buffer */ 1442 /* note: 1443 * it doesn't matter which FIFO channel's DMA registers we access, they are in 1444 * fact all the same set. It also doesn't matter if the channel was assigned a 1445 * command or not. */ 1446 /* note also: 1447 * NV_GENERAL_DMAPUT is a write-only register on some cards (confirmed NV11). */ 1448 NV_REG32(NVACC_FIFO + NV_GENERAL_DMAPUT) = (si->engine.dma.put << 2); 1449 } 1450 } 1451 1452 /* this routine does not check the engine's internal hardware FIFO, but the DMA 1453 * command buffer. You can see this as a FIFO as well, that feeds the hardware FIFO. 1454 * The hardware FIFO state is checked by the DMA hardware automatically. */ 1455 static status_t nv_acc_fifofree_dma(uint16 cmd_size) 1456 { 1457 uint32 dmaget; 1458 1459 /* we'd better check for timeouts on the DMA engine as it's theoretically 1460 * breakable by malfunctioning software */ 1461 uint16 cnt = 0; 1462 1463 /* check if the DMA buffer has enough room for the command. 1464 * note: 1465 * engine.dma.free is 'cached' */ 1466 while ((si->engine.dma.free < cmd_size) && (cnt < 10000) && (err < 3)) 1467 { 1468 /* see where the engine is currently fetching from the buffer */ 1469 /* note: 1470 * read this only once in the code as accessing registers is relatively slow */ 1471 /* note also: 1472 * it doesn't matter which FIFO channel's DMA registers we access, they are in 1473 * fact all the same set. It also doesn't matter if the channel was assigned a 1474 * command or not. */ 1475 dmaget = ((NV_REG32(NVACC_FIFO + NV_GENERAL_DMAGET)) >> 2); 1476 1477 /* update timeout counter: on NV11 on a Pentium4 2.8Ghz max reached count 1478 * using BeRoMeter 1.2.6 was about 600; so counting 10000 before generating 1479 * a timeout should definately do it. Snooze()-ing cannot be done without a 1480 * serious speed penalty, even if done for only 1 microSecond. */ 1481 cnt++; 1482 1483 /* where's the engine fetching viewed from us issuing? */ 1484 if (si->engine.dma.put >= dmaget) 1485 { 1486 /* engine is fetching 'behind us', the last piece of the buffer is free */ 1487 1488 /* note the 'updated' free space we have in the DMA buffer */ 1489 si->engine.dma.free = si->engine.dma.max - si->engine.dma.current; 1490 /* if it's enough after all we exit this routine immediately. Else: */ 1491 if (si->engine.dma.free < cmd_size) 1492 { 1493 /* not enough room left, so instruct DMA engine to reset the buffer 1494 * when it's reaching the end of it */ 1495 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x20000000; 1496 /* reset our buffer pointer, so new commands will be placed at the 1497 * beginning of the buffer. */ 1498 si->engine.dma.current = 0; 1499 /* tell the engine to fetch the remaining command(s) in the DMA buffer 1500 * that where not executed before. */ 1501 nv_start_dma(); 1502 1503 /* NOW the engine is fetching 'in front of us', so the first piece 1504 * of the buffer is free */ 1505 1506 /* note the updated current free space we have in the DMA buffer */ 1507 si->engine.dma.free = dmaget - si->engine.dma.current; 1508 /* mind this pittfall: 1509 * Leave some room between where the engine is fetching and where we 1510 * put new commands. Otherwise the engine will crash on heavy loads. 1511 * A crash can be forced best in 640x480x32 mode with BeRoMeter 1.2.6. 1512 * (confirmed on NV11 and NV43 with less than 256 words forced freespace.) 1513 * Note: 1514 * The engine is DMA triggered for fetching chunks every 128 bytes, 1515 * maybe this is the reason for this behaviour. 1516 * Note also: 1517 * it looks like the space that needs to be kept free is coupled 1518 * with the size of the DMA buffer. */ 1519 if (si->engine.dma.free < 256) 1520 si->engine.dma.free = 0; 1521 else 1522 si->engine.dma.free -= 256; 1523 } 1524 } 1525 else 1526 { 1527 /* engine is fetching 'in front of us', so the first piece of the buffer 1528 * is free */ 1529 1530 /* note the updated current free space we have in the DMA buffer */ 1531 si->engine.dma.free = dmaget - si->engine.dma.current; 1532 /* mind this pittfall: 1533 * Leave some room between where the engine is fetching and where we 1534 * put new commands. Otherwise the engine will crash on heavy loads. 1535 * A crash can be forced best in 640x480x32 mode with BeRoMeter 1.2.6. 1536 * (confirmed on NV11 and NV43 with less than 256 words forced freespace.) 1537 * Note: 1538 * The engine is DMA triggered for fetching chunks every 128 bytes, 1539 * maybe this is the reason for this behaviour. 1540 * Note also: 1541 * it looks like the space that needs to be kept free is coupled 1542 * with the size of the DMA buffer. */ 1543 if (si->engine.dma.free < 256) 1544 si->engine.dma.free = 0; 1545 else 1546 si->engine.dma.free -= 256; 1547 } 1548 } 1549 1550 /* log timeout if we had one */ 1551 if (cnt == 10000) 1552 { 1553 if (err < 3) err++; 1554 LOG(4,("ACC_DMA: fifofree; DMA timeout #%d, engine trouble!\n", err)); 1555 } 1556 1557 /* we must make the acceleration routines abort or the driver will hang! */ 1558 if (err >= 3) return B_ERROR; 1559 1560 return B_OK; 1561 } 1562 1563 static void nv_acc_cmd_dma(uint32 cmd, uint16 offset, uint16 size) 1564 { 1565 /* NV_FIFO_DMA_OPCODE: set number of cmd words (b18 - 28); set FIFO offset for 1566 * first cmd word (b2 - 15); set DMA opcode = method (b29 - 31). 1567 * a 'NOP' is the opcode word $00000000. */ 1568 /* note: 1569 * possible DMA opcodes: 1570 * b'000' is 'method' (execute cmd); 1571 * b'001' is 'jump'; 1572 * b'002' is 'noninc method' (execute buffer wrap-around); 1573 * b'003' is 'call': return is executed by opcode word $00020000 (b17 = 1). */ 1574 /* note also: 1575 * this system uses auto-increments for the FIFO offset adresses. Make sure 1576 * to set a new adress if a gap exists between the previous one and the new one. */ 1577 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = ((size << 18) | 1578 ((si->engine.fifo.ch_ptr[cmd] + offset) & 0x0000fffc)); 1579 1580 /* space left after issuing the current command is the cmd AND it's arguments less */ 1581 si->engine.dma.free -= (size + 1); 1582 } 1583 1584 static void nv_acc_set_ch_dma(uint16 ch, uint32 handle) 1585 { 1586 /* issue FIFO channel assign cmd */ 1587 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = ((1 << 18) | ch); 1588 /* set new assignment */ 1589 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = (0x80000000 | handle); 1590 1591 /* space left after issuing the current command is the cmd AND it's arguments less */ 1592 si->engine.dma.free -= 2; 1593 } 1594 1595 /* note: 1596 * switching fifo channel assignments this way has no noticable slowdown: 1597 * measured 0.2% with Quake2. */ 1598 void nv_acc_assert_fifo_dma(void) 1599 { 1600 /* does every engine cmd this accelerant needs have a FIFO channel? */ 1601 //fixme: can probably be optimized for both speed and channel selection... 1602 if (!si->engine.fifo.ch_ptr[NV_ROP5_SOLID] || 1603 !si->engine.fifo.ch_ptr[NV_IMAGE_BLACK_RECTANGLE] || 1604 !si->engine.fifo.ch_ptr[NV_IMAGE_PATTERN] || 1605 !si->engine.fifo.ch_ptr[NV4_SURFACE] || 1606 !si->engine.fifo.ch_ptr[NV_IMAGE_BLIT] || 1607 !si->engine.fifo.ch_ptr[NV4_GDI_RECTANGLE_TEXT] || 1608 !si->engine.fifo.ch_ptr[NV_SCALED_IMAGE_FROM_MEMORY]) 1609 { 1610 uint16 cnt; 1611 1612 /* free the FIFO channels we want from the currently assigned cmd's */ 1613 si->engine.fifo.ch_ptr[si->engine.fifo.handle[0]] = 0; 1614 si->engine.fifo.ch_ptr[si->engine.fifo.handle[1]] = 0; 1615 si->engine.fifo.ch_ptr[si->engine.fifo.handle[2]] = 0; 1616 si->engine.fifo.ch_ptr[si->engine.fifo.handle[3]] = 0; 1617 si->engine.fifo.ch_ptr[si->engine.fifo.handle[4]] = 0; 1618 si->engine.fifo.ch_ptr[si->engine.fifo.handle[5]] = 0; 1619 si->engine.fifo.ch_ptr[si->engine.fifo.handle[6]] = 0; 1620 1621 /* set new object handles */ 1622 si->engine.fifo.handle[0] = NV_ROP5_SOLID; 1623 si->engine.fifo.handle[1] = NV_IMAGE_BLACK_RECTANGLE; 1624 si->engine.fifo.handle[2] = NV_IMAGE_PATTERN; 1625 si->engine.fifo.handle[3] = NV4_SURFACE; 1626 si->engine.fifo.handle[4] = NV_IMAGE_BLIT; 1627 si->engine.fifo.handle[5] = NV4_GDI_RECTANGLE_TEXT; 1628 si->engine.fifo.handle[6] = NV_SCALED_IMAGE_FROM_MEMORY; 1629 1630 /* set handle's pointers to their assigned FIFO channels */ 1631 /* note: 1632 * b0-1 aren't used as adressbits. Using b0 to indicate a valid pointer. */ 1633 for (cnt = 0; cnt < 0x08; cnt++) 1634 { 1635 si->engine.fifo.ch_ptr[(si->engine.fifo.handle[cnt])] = 1636 (0x00000001 + (cnt * 0x00002000)); 1637 } 1638 1639 /* wait for room in fifo for new FIFO assigment cmds if needed. */ 1640 if (nv_acc_fifofree_dma(14) != B_OK) return; 1641 1642 /* program new FIFO assignments */ 1643 /* Raster OPeration: */ 1644 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH0, si->engine.fifo.handle[0]); 1645 /* Clip: */ 1646 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH1, si->engine.fifo.handle[1]); 1647 /* Pattern: */ 1648 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH2, si->engine.fifo.handle[2]); 1649 /* 2D Surface: */ 1650 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH3, si->engine.fifo.handle[3]); 1651 /* Blit: */ 1652 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH4, si->engine.fifo.handle[4]); 1653 /* Bitmap: */ 1654 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH5, si->engine.fifo.handle[5]); 1655 /* Scaled and fitered Blit: */ 1656 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH6, si->engine.fifo.handle[6]); 1657 1658 /* tell the engine to fetch and execute all (new) commands in the DMA buffer */ 1659 nv_start_dma(); 1660 } 1661 } 1662 1663 /* 1664 note: 1665 moved acceleration 'top-level' routines to be integrated in the engine: 1666 it is costly to call the engine for every single function within a loop! 1667 (measured with BeRoMeter 1.2.6: upto 15% speed increase on all CPU's.) 1668 1669 note also: 1670 splitting up each command list into sublists (see routines below) prevents 1671 a lot more nested calls, further increasing the speed with upto 70%. 1672 1673 finally: 1674 sending the sublist to just one single engine command even further increases 1675 speed with upto another 10%. This can't be done for blits though, as this engine- 1676 command's hardware does not support multiple objects. 1677 */ 1678 1679 /* screen to screen blit - i.e. move windows around and scroll within them. */ 1680 void SCREEN_TO_SCREEN_BLIT_DMA(engine_token *et, blit_params *list, uint32 count) 1681 { 1682 uint32 i = 0; 1683 uint16 subcnt; 1684 1685 /*** init acc engine for blit function ***/ 1686 /* ROP registers (Raster OPeration): 1687 * wait for room in fifo for ROP cmd if needed. */ 1688 if (nv_acc_fifofree_dma(2) != B_OK) return; 1689 /* now setup ROP (writing 2 32bit words) for GXcopy */ 1690 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); 1691 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xcc; /* SetRop5 */ 1692 1693 /*** do each blit ***/ 1694 /* Note: 1695 * blit-copy direction is determined inside nvidia hardware: no setup needed */ 1696 while (count) 1697 { 1698 /* break up the list in sublists to minimize calls, while making sure long 1699 * lists still get executed without trouble */ 1700 subcnt = 32; 1701 if (count < 32) subcnt = count; 1702 count -= subcnt; 1703 1704 /* wait for room in fifo for blit cmd if needed. */ 1705 if (nv_acc_fifofree_dma(4 * subcnt) != B_OK) return; 1706 1707 while (subcnt--) 1708 { 1709 /* now setup blit (writing 4 32bit words) */ 1710 nv_acc_cmd_dma(NV_IMAGE_BLIT, NV_IMAGE_BLIT_SOURCEORG, 3); 1711 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1712 (((list[i].src_top) << 16) | (list[i].src_left)); /* SourceOrg */ 1713 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1714 (((list[i].dest_top) << 16) | (list[i].dest_left)); /* DestOrg */ 1715 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1716 ((((list[i].height) + 1) << 16) | ((list[i].width) + 1)); /* HeightWidth */ 1717 1718 i++; 1719 } 1720 1721 /* tell the engine to fetch the commands in the DMA buffer that where not 1722 * executed before. */ 1723 nv_start_dma(); 1724 } 1725 1726 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 1727 si->engine.threeD.reload = 0xffffffff; 1728 } 1729 1730 /* scaled and filtered screen to screen blit - i.e. video playback without overlay */ 1731 /* note: source and destination may not overlap. */ 1732 //fixme? checkout NV5 and NV10 version of cmd: faster?? (or is 0x77 a 'autoselect' version?) 1733 void SCREEN_TO_SCREEN_SCALED_FILTERED_BLIT_DMA(engine_token *et, scaled_blit_params *list, uint32 count) 1734 { 1735 uint32 i = 0; 1736 uint16 subcnt; 1737 uint32 cmd_depth; 1738 uint8 bpp; 1739 1740 /*** init acc engine for scaled filtered blit function ***/ 1741 /* Set pixel width */ 1742 switch(si->dm.space) 1743 { 1744 case B_RGB15_LITTLE: 1745 cmd_depth = 0x00000002; 1746 bpp = 2; 1747 break; 1748 case B_RGB16_LITTLE: 1749 cmd_depth = 0x00000007; 1750 bpp = 2; 1751 break; 1752 case B_RGB32_LITTLE: 1753 case B_RGBA32_LITTLE: 1754 cmd_depth = 0x00000004; 1755 bpp = 4; 1756 break; 1757 /* fixme sometime: 1758 * we could do the spaces below if this function would be modified to be able 1759 * to use a source outside of the desktop, i.e. using offscreen bitmaps... */ 1760 case B_YCbCr422: 1761 cmd_depth = 0x00000005; 1762 bpp = 2; 1763 break; 1764 case B_YUV422: 1765 cmd_depth = 0x00000006; 1766 bpp = 2; 1767 break; 1768 default: 1769 /* note: this function does not support src or dest in the B_CMAP8 space! */ 1770 //fixme: the NV10 version of this cmd supports B_CMAP8 src though... (checkout) 1771 LOG(8,("ACC_DMA: scaled_filtered_blit, invalid bit depth\n")); 1772 return; 1773 } 1774 1775 /* modify surface depth settings for 15-bit colorspace so command works as intended */ 1776 if (si->dm.space == B_RGB15_LITTLE) 1777 { 1778 /* wait for room in fifo for surface setup cmd if needed */ 1779 if (nv_acc_fifofree_dma(2) != B_OK) return; 1780 /* now setup 2D surface (writing 1 32bit word) */ 1781 nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 1); 1782 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000002; /* Format */ 1783 } 1784 1785 /* TNT1 has fixed operation mode 'SRCcopy' while the rest can be programmed: */ 1786 if (si->ps.card_type != NV04) 1787 { 1788 /* wait for room in fifo for cmds if needed. */ 1789 if (nv_acc_fifofree_dma(5) != B_OK) return; 1790 /* now setup source bitmap colorspace */ 1791 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SETCOLORFORMAT, 2); 1792 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ 1793 /* now setup operation mode to SRCcopy */ 1794 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000003; /* SetOperation */ 1795 } 1796 else 1797 { 1798 /* wait for room in fifo for cmd if needed. */ 1799 if (nv_acc_fifofree_dma(4) != B_OK) return; 1800 /* now setup source bitmap colorspace */ 1801 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SETCOLORFORMAT, 1); 1802 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ 1803 /* TNT1 has fixed operation mode SRCcopy */ 1804 } 1805 /* now setup fill color (writing 2 32bit words) */ 1806 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); 1807 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000000; /* Color1A */ 1808 1809 /*** do each blit ***/ 1810 while (count) 1811 { 1812 /* break up the list in sublists to minimize calls, while making sure long 1813 * lists still get executed without trouble */ 1814 subcnt = 16; 1815 if (count < 16) subcnt = count; 1816 count -= subcnt; 1817 1818 /* wait for room in fifo for blit cmd if needed. */ 1819 if (nv_acc_fifofree_dma(12 * subcnt) != B_OK) return; 1820 1821 while (subcnt--) 1822 { 1823 /* now setup blit (writing 12 32bit words) */ 1824 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SOURCEORG, 6); 1825 /* setup dest clipping ref for blit (not used) (b0-15 = left, b16-31 = top) */ 1826 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0; /* SourceOrg */ 1827 /* setup dest clipping size for blit */ 1828 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1829 (((list[i].dest_height + 1) << 16) | (list[i].dest_width + 1)); /* SourceHeightWidth */ 1830 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1831 /* setup destination location and size for blit */ 1832 (((list[i].dest_top) << 16) | (list[i].dest_left)); /* DestOrg */ 1833 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1834 (((list[i].dest_height + 1) << 16) | (list[i].dest_width + 1)); /* DestHeightWidth */ 1835 //fixme: findout scaling limits... (although the current cmd interface doesn't support them.) 1836 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1837 (((list[i].src_width + 1) << 20) / (list[i].dest_width + 1)); /* HorInvScale (in 12.20 format) */ 1838 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1839 (((list[i].src_height + 1) << 20) / (list[i].dest_height + 1)); /* VerInvScale (in 12.20 format) */ 1840 1841 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SOURCESIZE, 4); 1842 /* setup horizontal and vertical source (fetching) ends. 1843 * note: 1844 * horizontal granularity is 2 pixels, vertical granularity is 1 pixel. 1845 * look at Matrox or Neomagic bes engines code for usage example. */ 1846 //fixme: tested 15, 16 and 32-bit RGB depth, verify other depths... 1847 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1848 (((list[i].src_height + 1) << 16) | 1849 (((list[i].src_width + 1) + 0x0001) & ~0x0001)); /* SourceHeightWidth */ 1850 /* setup source pitch (b0-15). Set 'format origin center' (b16-17) and 1851 * select 'format interpolator foh (bilinear filtering)' (b24). */ 1852 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1853 (si->fbc.bytes_per_row | (1 << 16) | (1 << 24)); /* SourcePitch */ 1854 /* setup source surface location */ 1855 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1856 ((uint32)((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)) + 1857 (list[i].src_top * si->fbc.bytes_per_row) + (list[i].src_left * bpp); /* Offset */ 1858 /* setup source start: first (sub)pixel contributing to output picture */ 1859 /* note: 1860 * clipping is not asked for. 1861 * look at nVidia NV10+ bes engine code for useage example. */ 1862 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1863 0; /* SourceRef (b0-15 = hor, b16-31 = ver: both in 12.4 format) */ 1864 1865 i++; 1866 } 1867 1868 /* tell the engine to fetch the commands in the DMA buffer that where not 1869 * executed before. */ 1870 nv_start_dma(); 1871 } 1872 1873 /* reset surface depth settings so the other engine commands works as intended */ 1874 if (si->dm.space == B_RGB15_LITTLE) 1875 { 1876 /* wait for room in fifo for surface setup cmd if needed */ 1877 if (nv_acc_fifofree_dma(2) != B_OK) return; 1878 /* now setup 2D surface (writing 1 32bit word) */ 1879 nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 1); 1880 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000004; /* Format */ 1881 1882 /* tell the engine to fetch the commands in the DMA buffer that where not 1883 * executed before. */ 1884 nv_start_dma(); 1885 } 1886 1887 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 1888 si->engine.threeD.reload = 0xffffffff; 1889 } 1890 1891 /* scaled and filtered screen to screen blit - i.e. video playback without overlay */ 1892 /* note: source and destination may not overlap. */ 1893 //fixme? checkout NV5 and NV10 version of cmd: faster?? (or is 0x77 a 'autoselect' version?) 1894 void OFFSCREEN_TO_SCREEN_SCALED_FILTERED_BLIT_DMA( 1895 engine_token *et, offscreen_buffer_config *config, clipped_scaled_blit_params *list, uint32 count) 1896 { 1897 uint32 i = 0; 1898 uint32 cmd_depth; 1899 uint8 bpp; 1900 1901 LOG(4,("ACC_DMA: offscreen src buffer location $%08x\n", (uint32)((uint8*)(config->buffer)))); 1902 1903 /*** init acc engine for scaled filtered blit function ***/ 1904 /* Set pixel width */ 1905 switch(config->space) 1906 { 1907 case B_RGB15_LITTLE: 1908 cmd_depth = 0x00000002; 1909 bpp = 2; 1910 break; 1911 case B_RGB16_LITTLE: 1912 cmd_depth = 0x00000007; 1913 bpp = 2; 1914 break; 1915 case B_RGB32_LITTLE: 1916 case B_RGBA32_LITTLE: 1917 cmd_depth = 0x00000004; 1918 bpp = 4; 1919 break; 1920 /* fixme sometime: 1921 * we could do the spaces below if this function would be modified to be able 1922 * to use a source outside of the desktop, i.e. using offscreen bitmaps... */ 1923 case B_YCbCr422: 1924 cmd_depth = 0x00000005; 1925 bpp = 2; 1926 break; 1927 case B_YUV422: 1928 cmd_depth = 0x00000006; 1929 bpp = 2; 1930 break; 1931 default: 1932 /* note: this function does not support src or dest in the B_CMAP8 space! */ 1933 //fixme: the NV10 version of this cmd supports B_CMAP8 src though... (checkout) 1934 LOG(8,("ACC_DMA: scaled_filtered_blit, invalid bit depth\n")); 1935 return; 1936 } 1937 1938 /* modify surface depth settings for 15-bit colorspace so command works as intended */ 1939 if (si->dm.space == B_RGB15_LITTLE) 1940 { 1941 /* wait for room in fifo for surface setup cmd if needed */ 1942 if (nv_acc_fifofree_dma(2) != B_OK) return; 1943 /* now setup 2D surface (writing 1 32bit word) */ 1944 nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 1); 1945 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000002; /* Format */ 1946 } 1947 1948 /* TNT1 has fixed operation mode 'SRCcopy' while the rest can be programmed: */ 1949 if (si->ps.card_type != NV04) 1950 { 1951 /* wait for room in fifo for cmds if needed. */ 1952 if (nv_acc_fifofree_dma(5) != B_OK) return; 1953 /* now setup source bitmap colorspace */ 1954 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SETCOLORFORMAT, 2); 1955 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ 1956 /* now setup operation mode to SRCcopy */ 1957 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000003; /* SetOperation */ 1958 } 1959 else 1960 { 1961 /* wait for room in fifo for cmd if needed. */ 1962 if (nv_acc_fifofree_dma(4) != B_OK) return; 1963 /* now setup source bitmap colorspace */ 1964 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SETCOLORFORMAT, 1); 1965 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ 1966 /* TNT1 has fixed operation mode SRCcopy */ 1967 } 1968 /* now setup fill color (writing 2 32bit words) */ 1969 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); 1970 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000000; /* Color1A */ 1971 1972 /*** do each blit ***/ 1973 while (count--) 1974 { 1975 uint32 j = 0; 1976 uint16 clipcnt = list[i].dest_clipcount; 1977 1978 LOG(4,("ACC_DMA: offscreen src left %d, top %d\n", list[i].src_left, list[i].src_top)); 1979 LOG(4,("ACC_DMA: offscreen src width %d, height %d\n", list[i].src_width + 1, list[i].src_height + 1)); 1980 LOG(4,("ACC_DMA: offscreen dest left %d, top %d\n", list[i].dest_left, list[i].dest_top)); 1981 LOG(4,("ACC_DMA: offscreen dest width %d, height %d\n", list[i].dest_width + 1, list[i].dest_height + 1)); 1982 1983 /* wait for room in fifo for blit cmd if needed. */ 1984 if (nv_acc_fifofree_dma(9 + (5 * clipcnt)) != B_OK) return; 1985 1986 /* now setup blit (writing 12 32bit words) */ 1987 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SOURCEORG + 8, 4); 1988 /* setup destination location and size for blit */ 1989 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1990 ((list[i].dest_top << 16) | list[i].dest_left); /* DestTopLeftOutputRect */ 1991 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1992 (((list[i].dest_height + 1) << 16) | (list[i].dest_width + 1)); /* DestHeightWidthOutputRect */ 1993 /* setup scaling */ 1994 //fixme: findout scaling limits... (although the current cmd interface doesn't support them.) 1995 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1996 (((list[i].src_width + 1) << 20) / (list[i].dest_width + 1)); /* HorInvScale (in 12.20 format) */ 1997 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1998 (((list[i].src_height + 1) << 20) / (list[i].dest_height + 1)); /* VerInvScale (in 12.20 format) */ 1999 2000 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SOURCESIZE, 3); 2001 /* setup horizontal and vertical source (fetching) ends. 2002 * note: 2003 * horizontal granularity is 2 pixels, vertical granularity is 1 pixel. 2004 * look at Matrox or Neomagic bes engines code for usage example. */ 2005 //fixme: tested 15, 16 and 32-bit RGB depth, verify other depths... 2006 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 2007 (((list[i].src_height + 1) << 16) | 2008 (((list[i].src_width + 1) + 0x0001) & ~0x0001)); /* SourceHeightWidth */ 2009 /* setup source pitch (b0-15). Set 'format origin center' (b16-17) and 2010 * select 'format interpolator foh (bilinear filtering)' (b24). */ 2011 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 2012 (config->bytes_per_row | (1 << 16) | (1 << 24)); /* SourcePitch */ 2013 2014 /* setup source surface location */ 2015 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 2016 (uint32)((uint8*)config->buffer - (uint8*)si->framebuffer + 2017 (list[i].src_top * config->bytes_per_row) + (list[i].src_left * bpp)); /* Offset */ 2018 2019 while (clipcnt--) 2020 { 2021 LOG(4,("ACC_DMA: offscreen clip left %d, top %d\n", 2022 list[i].dest_cliplist[j].left, list[i].dest_cliplist[j].top)); 2023 LOG(4,("ACC_DMA: offscreen clip width %d, height %d\n", 2024 list[i].dest_cliplist[j].width + 1, list[i].dest_cliplist[j].height + 1)); 2025 2026 /* now setup blit (writing 12 32bit words) */ 2027 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SOURCEORG, 2); 2028 /* setup dest clipping rect for blit (b0-15 = left, b16-31 = top) */ 2029 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 2030 (list[i].dest_cliplist[j].top << 16) | list[i].dest_cliplist[j].left; /* DestTopLeftClipRect */ 2031 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 2032 ((list[i].dest_cliplist[j].height + 1) << 16) | (list[i].dest_cliplist[j].width + 1); /* DestHeightWidthClipRect */ 2033 2034 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SOURCESIZE + 12, 1); 2035 /* setup source start: first (sub)pixel contributing to output picture */ 2036 /* note: 2037 * clipping is not asked for. 2038 * look at nVidia NV10+ bes engine code for useage example. */ 2039 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 2040 0; /* SourceRef (b0-15 = hor, b16-31 = ver: both in 12.4 format) */ 2041 2042 j++; 2043 } 2044 2045 i++; 2046 } 2047 2048 /* tell the engine to fetch the commands in the DMA buffer that where not 2049 * executed before. */ 2050 nv_start_dma(); 2051 2052 /* reset surface depth settings so the other engine commands works as intended */ 2053 if (si->dm.space == B_RGB15_LITTLE) 2054 { 2055 /* wait for room in fifo for surface setup cmd if needed */ 2056 if (nv_acc_fifofree_dma(2) != B_OK) return; 2057 /* now setup 2D surface (writing 1 32bit word) */ 2058 nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 1); 2059 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000004; /* Format */ 2060 2061 /* tell the engine to fetch the commands in the DMA buffer that where not 2062 * executed before. */ 2063 nv_start_dma(); 2064 } 2065 2066 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 2067 si->engine.threeD.reload = 0xffffffff; 2068 } 2069 2070 /* rectangle fill - i.e. workspace and window background color */ 2071 void FILL_RECTANGLE_DMA(engine_token *et, uint32 colorIndex, fill_rect_params *list, uint32 count) 2072 { 2073 uint32 i = 0; 2074 uint16 subcnt; 2075 2076 /*** init acc engine for fill function ***/ 2077 /* ROP registers (Raster OPeration): 2078 * wait for room in fifo for ROP and bitmap cmd if needed. */ 2079 if (nv_acc_fifofree_dma(4) != B_OK) return; 2080 /* now setup ROP (writing 2 32bit words) for GXcopy */ 2081 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); 2082 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xcc; /* SetRop5 */ 2083 /* now setup fill color (writing 2 32bit words) */ 2084 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); 2085 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = colorIndex; /* Color1A */ 2086 2087 /*** draw each rectangle ***/ 2088 while (count) 2089 { 2090 /* break up the list in sublists to minimize calls, while making sure long 2091 * lists still get executed without trouble */ 2092 subcnt = 32; 2093 if (count < 32) subcnt = count; 2094 count -= subcnt; 2095 2096 /* wait for room in fifo for bitmap cmd if needed. */ 2097 if (nv_acc_fifofree_dma(1 + (2 * subcnt)) != B_OK) return; 2098 2099 /* issue fill command once... */ 2100 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, (2 * subcnt)); 2101 /* ... and send multiple rects (engine cmd supports 32 max) */ 2102 while (subcnt--) 2103 { 2104 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 2105 (((list[i].left) << 16) | ((list[i].top) & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */ 2106 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 2107 (((((list[i].right)+1) - (list[i].left)) << 16) | 2108 (((list[i].bottom-list[i].top)+1) & 0x0000ffff)); /* Unclipped Rect 0 WidthHeight */ 2109 2110 i++; 2111 } 2112 2113 /* tell the engine to fetch the commands in the DMA buffer that where not 2114 * executed before. */ 2115 nv_start_dma(); 2116 } 2117 2118 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 2119 si->engine.threeD.reload = 0xffffffff; 2120 } 2121 2122 /* span fill - i.e. (selected) menuitem background color (Dano) */ 2123 void FILL_SPAN_DMA(engine_token *et, uint32 colorIndex, uint16 *list, uint32 count) 2124 { 2125 uint32 i = 0; 2126 uint16 subcnt; 2127 2128 /*** init acc engine for fill function ***/ 2129 /* ROP registers (Raster OPeration): 2130 * wait for room in fifo for ROP and bitmap cmd if needed. */ 2131 if (nv_acc_fifofree_dma(4) != B_OK) return; 2132 /* now setup ROP (writing 2 32bit words) for GXcopy */ 2133 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); 2134 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xcc; /* SetRop5 */ 2135 /* now setup fill color (writing 2 32bit words) */ 2136 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); 2137 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = colorIndex; /* Color1A */ 2138 2139 /*** draw each span ***/ 2140 while (count) 2141 { 2142 /* break up the list in sublists to minimize calls, while making sure long 2143 * lists still get executed without trouble */ 2144 subcnt = 32; 2145 if (count < 32) subcnt = count; 2146 count -= subcnt; 2147 2148 /* wait for room in fifo for bitmap cmd if needed. */ 2149 if (nv_acc_fifofree_dma(1 + (2 * subcnt)) != B_OK) return; 2150 2151 /* issue fill command once... */ 2152 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, (2 * subcnt)); 2153 /* ... and send multiple rects (spans) (engine cmd supports 32 max) */ 2154 while (subcnt--) 2155 { 2156 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 2157 (((list[i+1]) << 16) | ((list[i]) & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */ 2158 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 2159 ((((list[i+2]+1) - (list[i+1])) << 16) | 0x00000001); /* Unclipped Rect 0 WidthHeight */ 2160 2161 i+=3; 2162 } 2163 2164 /* tell the engine to fetch the commands in the DMA buffer that where not 2165 * executed before. */ 2166 nv_start_dma(); 2167 } 2168 2169 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 2170 si->engine.threeD.reload = 0xffffffff; 2171 } 2172 2173 /* rectangle invert - i.e. text cursor and text selection */ 2174 void INVERT_RECTANGLE_DMA(engine_token *et, fill_rect_params *list, uint32 count) 2175 { 2176 uint32 i = 0; 2177 uint16 subcnt; 2178 2179 /*** init acc engine for invert function ***/ 2180 /* ROP registers (Raster OPeration): 2181 * wait for room in fifo for ROP and bitmap cmd if needed. */ 2182 if (nv_acc_fifofree_dma(4) != B_OK) return; 2183 /* now setup ROP (writing 2 32bit words) for GXinvert */ 2184 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); 2185 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x55; /* SetRop5 */ 2186 /* now reset fill color (writing 2 32bit words) */ 2187 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); 2188 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000000; /* Color1A */ 2189 2190 /*** invert each rectangle ***/ 2191 while (count) 2192 { 2193 /* break up the list in sublists to minimize calls, while making sure long 2194 * lists still get executed without trouble */ 2195 subcnt = 32; 2196 if (count < 32) subcnt = count; 2197 count -= subcnt; 2198 2199 /* wait for room in fifo for bitmap cmd if needed. */ 2200 if (nv_acc_fifofree_dma(1 + (2 * subcnt)) != B_OK) return; 2201 2202 /* issue fill command once... */ 2203 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, (2 * subcnt)); 2204 /* ... and send multiple rects (engine cmd supports 32 max) */ 2205 while (subcnt--) 2206 { 2207 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 2208 (((list[i].left) << 16) | ((list[i].top) & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */ 2209 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 2210 (((((list[i].right)+1) - (list[i].left)) << 16) | 2211 (((list[i].bottom-list[i].top)+1) & 0x0000ffff)); /* Unclipped Rect 0 WidthHeight */ 2212 2213 i++; 2214 } 2215 2216 /* tell the engine to fetch the commands in the DMA buffer that where not 2217 * executed before. */ 2218 nv_start_dma(); 2219 } 2220 2221 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 2222 si->engine.threeD.reload = 0xffffffff; 2223 } 2224