1 /* NV Acceleration functions */ 2 3 /* Author: 4 Rudolf Cornelissen 8/2003-6/2010. 5 6 This code was possible thanks to: 7 - the Linux XFree86 NV driver, 8 - the Linux UtahGLX 3D driver. 9 */ 10 11 #define MODULE_BIT 0x00080000 12 13 #include "nv_std.h" 14 15 /*acceleration notes*/ 16 17 /*functions Be's app_server uses: 18 fill span (horizontal only) 19 fill rectangle (these 2 are very similar) 20 invert rectangle 21 blit 22 */ 23 24 static void nv_init_for_3D_dma(void); 25 static void nv_start_dma(void); 26 static status_t nv_acc_fifofree_dma(uint16 cmd_size); 27 static void nv_acc_cmd_dma(uint32 cmd, uint16 offset, uint16 size); 28 static void nv_acc_set_ch_dma(uint16 ch, uint32 handle); 29 30 /* used to track engine DMA stalls */ 31 static uint8 err; 32 33 /* wait until engine completely idle */ 34 status_t nv_acc_wait_idle_dma() 35 { 36 /* we'd better check for timeouts on the DMA engine as it's theoretically 37 * breakable by malfunctioning software */ 38 uint16 cnt = 0; 39 40 /* wait until all upcoming commands are in execution at least. Do this until 41 * we hit a timeout; abort if we failed at least three times before: 42 * if DMA stalls, we have to forget about it alltogether at some point, or 43 * the system will almost come to a complete halt.. */ 44 /* note: 45 * it doesn't matter which FIFO channel's DMA registers we access, they are in 46 * fact all the same set. It also doesn't matter if the channel was assigned a 47 * command or not. */ 48 while ((NV_REG32(NVACC_FIFO + NV_GENERAL_DMAGET) != (si->engine.dma.put << 2)) && 49 (cnt < 10000) && (err < 3)) 50 { 51 /* snooze a bit so I do not hammer the bus */ 52 snooze (100); 53 cnt++; 54 } 55 56 /* log timeout if we had one */ 57 if (cnt == 10000) 58 { 59 if (err < 3) err++; 60 LOG(4,("ACC_DMA: wait_idle; DMA timeout #%d, engine trouble!\n", err)); 61 } 62 63 /* wait until execution completed */ 64 while (ACCR(STATUS)) 65 { 66 /* snooze a bit so I do not hammer the bus */ 67 snooze (100); 68 } 69 70 return B_OK; 71 } 72 73 /* AFAIK this must be done for every new screenmode. 74 * Engine required init. */ 75 status_t nv_acc_init_dma() 76 { 77 uint32 cnt, tmp; 78 uint32 surf_depth, cmd_depth; 79 /* reset the engine DMA stalls counter */ 80 err = 0; 81 82 /* a hanging engine only recovers from a complete power-down/power-up cycle */ 83 NV_REG32(NV32_PWRUPCTRL) = 0xffff00ff; 84 snooze(1000); 85 NV_REG32(NV32_PWRUPCTRL) = 0xffffffff; 86 87 /* don't try this on NV20 and later.. */ 88 /* note: 89 * the specific register that's responsible for the speedfix on NV18 is 90 * $00400ed8: bit 6 needs to be zero for fastest rendering (confirmed). */ 91 /* note also: 92 * on NV28 the following ranges could be reset (confirmed): 93 * $00400000 upto/incl. $004002fc; 94 * $00400400 upto/incl. $004017fc; 95 * $0040180c upto/incl. $00401948; 96 * $00401994 upto/incl. $00401a80; 97 * $00401a94 upto/incl. $00401ffc. 98 * The intermediate ranges hang the engine upon resetting. */ 99 if (si->ps.card_arch < NV20A) 100 { 101 /* actively reset the PGRAPH registerset (acceleration engine) */ 102 for (cnt = 0x00400000; cnt < 0x00402000; cnt +=4) 103 { 104 NV_REG32(cnt) = 0x00000000; 105 } 106 } 107 108 /* setup PTIMER: */ 109 LOG(4,("ACC_DMA: timer numerator $%08x, denominator $%08x\n", ACCR(PT_NUMERATOR), ACCR(PT_DENOMINATR))); 110 111 /* The NV28 BIOS programs PTIMER like this (see coldstarting in nv_info.c) */ 112 //ACCW(PT_NUMERATOR, (si->ps.std_engine_clock * 20)); 113 //ACCW(PT_DENOMINATR, 0x00000271); 114 /* Nouveau (march 2009) mentions something like: writing 8 and 3 to these regs breaks the timings 115 * on the LVDS hardware sequencing microcode. A correct solution involves calculations with the GPU PLL. */ 116 117 /* For now use BIOS pre-programmed values if there */ 118 if (!ACCR(PT_NUMERATOR) || !ACCR(PT_DENOMINATR)) { 119 /* set timer numerator to 8 (in b0-15) */ 120 ACCW(PT_NUMERATOR, 0x00000008); 121 /* set timer denominator to 3 (in b0-15) */ 122 ACCW(PT_DENOMINATR, 0x00000003); 123 } 124 125 /* disable timer-alarm INT requests (b0) */ 126 ACCW(PT_INTEN, 0x00000000); 127 /* reset timer-alarm INT status bit (b0) */ 128 ACCW(PT_INTSTAT, 0xffffffff); 129 130 /* enable PRAMIN write access on pre NV10 before programming it! */ 131 if (si->ps.card_arch == NV04A) 132 { 133 /* set framebuffer config: type = notiling, PRAMIN write access enabled */ 134 NV_REG32(NV32_PFB_CONFIG_0) = 0x00001114; 135 } 136 else 137 { 138 /* setup acc engine 'source' tile adressranges */ 139 if ((si->ps.card_type <= NV40) || (si->ps.card_type == NV45)) 140 { 141 ACCW(NV10_FBTIL0AD, 0); 142 ACCW(NV10_FBTIL1AD, 0); 143 ACCW(NV10_FBTIL2AD, 0); 144 ACCW(NV10_FBTIL3AD, 0); 145 ACCW(NV10_FBTIL4AD, 0); 146 ACCW(NV10_FBTIL5AD, 0); 147 ACCW(NV10_FBTIL6AD, 0); 148 ACCW(NV10_FBTIL7AD, 0); 149 ACCW(NV10_FBTIL0ED, (si->ps.memory_size - 1)); 150 ACCW(NV10_FBTIL1ED, (si->ps.memory_size - 1)); 151 ACCW(NV10_FBTIL2ED, (si->ps.memory_size - 1)); 152 ACCW(NV10_FBTIL3ED, (si->ps.memory_size - 1)); 153 ACCW(NV10_FBTIL4ED, (si->ps.memory_size - 1)); 154 ACCW(NV10_FBTIL5ED, (si->ps.memory_size - 1)); 155 ACCW(NV10_FBTIL6ED, (si->ps.memory_size - 1)); 156 ACCW(NV10_FBTIL7ED, (si->ps.memory_size - 1)); 157 } 158 else 159 { 160 /* NV41, 43, 44, G70 and up */ 161 ACCW(NV41_FBTIL0AD, 0); 162 ACCW(NV41_FBTIL1AD, 0); 163 ACCW(NV41_FBTIL2AD, 0); 164 ACCW(NV41_FBTIL3AD, 0); 165 ACCW(NV41_FBTIL4AD, 0); 166 ACCW(NV41_FBTIL5AD, 0); 167 ACCW(NV41_FBTIL6AD, 0); 168 ACCW(NV41_FBTIL7AD, 0); 169 ACCW(NV41_FBTIL8AD, 0); 170 ACCW(NV41_FBTIL9AD, 0); 171 ACCW(NV41_FBTILAAD, 0); 172 ACCW(NV41_FBTILBAD, 0); 173 ACCW(NV41_FBTIL0ED, (si->ps.memory_size - 1)); 174 ACCW(NV41_FBTIL1ED, (si->ps.memory_size - 1)); 175 ACCW(NV41_FBTIL2ED, (si->ps.memory_size - 1)); 176 ACCW(NV41_FBTIL3ED, (si->ps.memory_size - 1)); 177 ACCW(NV41_FBTIL4ED, (si->ps.memory_size - 1)); 178 ACCW(NV41_FBTIL5ED, (si->ps.memory_size - 1)); 179 ACCW(NV41_FBTIL6ED, (si->ps.memory_size - 1)); 180 ACCW(NV41_FBTIL7ED, (si->ps.memory_size - 1)); 181 ACCW(NV41_FBTIL8ED, (si->ps.memory_size - 1)); 182 ACCW(NV41_FBTIL9ED, (si->ps.memory_size - 1)); 183 ACCW(NV41_FBTILAED, (si->ps.memory_size - 1)); 184 ACCW(NV41_FBTILBED, (si->ps.memory_size - 1)); 185 186 if (si->ps.card_type >= G70) 187 { 188 ACCW(G70_FBTILCAD, 0); 189 ACCW(G70_FBTILDAD, 0); 190 ACCW(G70_FBTILEAD, 0); 191 ACCW(G70_FBTILCED, (si->ps.memory_size - 1)); 192 ACCW(G70_FBTILDED, (si->ps.memory_size - 1)); 193 ACCW(G70_FBTILEED, (si->ps.memory_size - 1)); 194 } 195 } 196 } 197 198 /*** PRAMIN ***/ 199 /* first clear the entire RAMHT (hash-table) space to a defined state. It turns 200 * out at least NV11 will keep the previously programmed handles over resets and 201 * power-outages upto about 15 seconds!! Faulty entries might well hang the 202 * engine (confirmed on NV11). 203 * Note: 204 * this behaviour is not very strange: even very old DRAM chips are known to be 205 * able to do this, even though you should refresh them every few milliseconds or 206 * so. (Large memory cell capacitors, though different cells vary a lot in their 207 * capacity.) 208 * Of course data validity is not certain by a long shot over this large 209 * amount of time.. */ 210 for(cnt = 0; cnt < 0x0400; cnt++) 211 NV_REG32(NVACC_HT_HANDL_00 + (cnt << 2)) = 0; 212 /* RAMHT (hash-table) space SETUP FIFO HANDLES */ 213 /* note: 214 * 'instance' tells you where the engine command is stored in 'PR_CTXx_x' sets 215 * below: instance being b4-19 with baseadress NV_PRAMIN_CTX_0 (0x00700000). 216 * That command is linked to the handle noted here. This handle is then used to 217 * tell the FIFO to which engine command it is connected! 218 * (CTX registers are actually a sort of RAM space.) */ 219 if (si->ps.card_arch >= NV40A) 220 { 221 /* (first set) */ 222 ACCW(HT_HANDL_00, (0x80000000 | NV10_CONTEXT_SURFACES_2D)); /* 32bit handle (not used) */ 223 ACCW(HT_VALUE_00, 0x0010114c); /* instance $114c, engine = acc engine, CHID = $00 */ 224 225 ACCW(HT_HANDL_01, (0x80000000 | NV_IMAGE_BLIT)); /* 32bit handle */ 226 ACCW(HT_VALUE_01, 0x00101148); /* instance $1148, engine = acc engine, CHID = $00 */ 227 228 ACCW(HT_HANDL_02, (0x80000000 | NV4_GDI_RECTANGLE_TEXT)); /* 32bit handle */ 229 ACCW(HT_VALUE_02, 0x0010114a); /* instance $114a, engine = acc engine, CHID = $00 */ 230 231 /* (second set) */ 232 ACCW(HT_HANDL_10, (0x80000000 | NV_ROP5_SOLID)); /* 32bit handle */ 233 ACCW(HT_VALUE_10, 0x00101142); /* instance $1142, engine = acc engine, CHID = $00 */ 234 235 ACCW(HT_HANDL_11, (0x80000000 | NV_IMAGE_BLACK_RECTANGLE)); /* 32bit handle */ 236 ACCW(HT_VALUE_11, 0x00101144); /* instance $1144, engine = acc engine, CHID = $00 */ 237 238 ACCW(HT_HANDL_12, (0x80000000 | NV_IMAGE_PATTERN)); /* 32bit handle */ 239 ACCW(HT_VALUE_12, 0x00101146); /* instance $1146, engine = acc engine, CHID = $00 */ 240 241 ACCW(HT_HANDL_13, (0x80000000 | NV_SCALED_IMAGE_FROM_MEMORY)); /* 32bit handle */ 242 ACCW(HT_VALUE_13, 0x0010114e); /* instance $114e, engine = acc engine, CHID = $00 */ 243 } 244 else 245 { 246 /* (first set) */ 247 ACCW(HT_HANDL_00, (0x80000000 | NV4_SURFACE)); /* 32bit handle */ 248 ACCW(HT_VALUE_00, 0x80011145); /* instance $1145, engine = acc engine, CHID = $00 */ 249 250 ACCW(HT_HANDL_01, (0x80000000 | NV_IMAGE_BLIT)); /* 32bit handle */ 251 ACCW(HT_VALUE_01, 0x80011146); /* instance $1146, engine = acc engine, CHID = $00 */ 252 253 ACCW(HT_HANDL_02, (0x80000000 | NV4_GDI_RECTANGLE_TEXT)); /* 32bit handle */ 254 ACCW(HT_VALUE_02, 0x80011147); /* instance $1147, engine = acc engine, CHID = $00 */ 255 256 ACCW(HT_HANDL_03, (0x80000000 | NV4_CONTEXT_SURFACES_ARGB_ZS)); /* 32bit handle (3D) */ 257 ACCW(HT_VALUE_03, 0x80011148); /* instance $1148, engine = acc engine, CHID = $00 */ 258 259 /* NV4_ and NV10_DX5_TEXTURE_TRIANGLE should be identical */ 260 ACCW(HT_HANDL_04, (0x80000000 | NV4_DX5_TEXTURE_TRIANGLE)); /* 32bit handle (3D) */ 261 ACCW(HT_VALUE_04, 0x80011149); /* instance $1149, engine = acc engine, CHID = $00 */ 262 263 /* NV4_ and NV10_DX6_MULTI_TEXTURE_TRIANGLE should be identical */ 264 ACCW(HT_HANDL_05, (0x80000000 | NV4_DX6_MULTI_TEXTURE_TRIANGLE)); /* 32bit handle (not used) */ 265 ACCW(HT_VALUE_05, 0x8001114a); /* instance $114a, engine = acc engine, CHID = $00 */ 266 267 ACCW(HT_HANDL_06, (0x80000000 | NV1_RENDER_SOLID_LIN)); /* 32bit handle (not used) */ 268 ACCW(HT_VALUE_06, 0x8001114c); /* instance $114c, engine = acc engine, CHID = $00 */ 269 270 /* (second set) */ 271 ACCW(HT_HANDL_10, (0x80000000 | NV_ROP5_SOLID)); /* 32bit handle */ 272 ACCW(HT_VALUE_10, 0x80011142); /* instance $1142, engine = acc engine, CHID = $00 */ 273 274 ACCW(HT_HANDL_11, (0x80000000 | NV_IMAGE_BLACK_RECTANGLE)); /* 32bit handle */ 275 ACCW(HT_VALUE_11, 0x80011143); /* instance $1143, engine = acc engine, CHID = $00 */ 276 277 ACCW(HT_HANDL_12, (0x80000000 | NV_IMAGE_PATTERN)); /* 32bit handle */ 278 ACCW(HT_VALUE_12, 0x80011144); /* instance $1144, engine = acc engine, CHID = $00 */ 279 280 ACCW(HT_HANDL_13, (0x80000000 | NV_SCALED_IMAGE_FROM_MEMORY)); /* 32bit handle */ 281 ACCW(HT_VALUE_13, 0x8001114b); /* instance $114b, engine = acc engine, CHID = $00 */ 282 283 //2007 3D tests.. 284 if (si->ps.card_type == NV15) 285 { 286 ACCW(HT_HANDL_14, (0x80000000 | NV_TCL_PRIMITIVE_3D)); /* 32bit handle */ 287 ACCW(HT_VALUE_14, 0x8001114d); /* instance $114d, engine = acc engine, CHID = $00 */ 288 } 289 290 } 291 292 /* program CTX registers: CTX1 is mostly done later (colorspace dependant) */ 293 /* note: 294 * CTX determines which HT handles point to what engine commands. */ 295 /* note also: 296 * CTX registers are in fact in the same GPU internal RAM space as the engine's 297 * hashtable. This means that stuff programmed in here also survives resets and 298 * power-outages! (confirmed NV11) */ 299 if (si->ps.card_arch >= NV40A) 300 { 301 /* setup a DMA define for use by command defines below. */ 302 ACCW(PR_CTX0_R, 0x00003000); /* DMA page table present and of linear type; 303 * DMA target node is NVM (non-volatile memory?) 304 * (instead of doing PCI or AGP transfers) */ 305 ACCW(PR_CTX1_R, (si->ps.memory_size - 1)); /* DMA limit: size is all cardRAM */ 306 ACCW(PR_CTX2_R, ((0x00000000 & 0xfffff000) | 0x00000002)); 307 /* DMA access type is READ_AND_WRITE; 308 * memory starts at start of cardRAM (b12-31): 309 * It's adress needs to be at a 4kb boundary! */ 310 ACCW(PR_CTX3_R, 0x00000002); /* unknown (looks like this is rubbish/not needed?) */ 311 /* setup set '0' for cmd NV_ROP5_SOLID */ 312 ACCW(PR_CTX0_0, 0x02080043); /* NVclass $043, patchcfg ROP_AND, nv10+: little endian */ 313 ACCW(PR_CTX1_0, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 314 ACCW(PR_CTX2_0, 0x00000000); /* DMA0 and DMA1 instance invalid */ 315 ACCW(PR_CTX3_0, 0x00000000); /* method traps disabled */ 316 ACCW(PR_CTX0_1, 0x00000000); /* extra */ 317 ACCW(PR_CTX1_1, 0x00000000); /* extra */ 318 /* setup set '1' for cmd NV_IMAGE_BLACK_RECTANGLE */ 319 ACCW(PR_CTX0_2, 0x02080019); /* NVclass $019, patchcfg ROP_AND, nv10+: little endian */ 320 ACCW(PR_CTX1_2, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 321 ACCW(PR_CTX2_2, 0x00000000); /* DMA0 and DMA1 instance invalid */ 322 ACCW(PR_CTX3_2, 0x00000000); /* method traps disabled */ 323 ACCW(PR_CTX0_3, 0x00000000); /* extra */ 324 ACCW(PR_CTX1_3, 0x00000000); /* extra */ 325 /* setup set '2' for cmd NV_IMAGE_PATTERN */ 326 ACCW(PR_CTX0_4, 0x02080018); /* NVclass $018, patchcfg ROP_AND, nv10+: little endian */ 327 ACCW(PR_CTX1_4, 0x02000000); /* colorspace not set, notify instance is $0200 (b16-31) */ 328 ACCW(PR_CTX2_4, 0x00000000); /* DMA0 and DMA1 instance invalid */ 329 ACCW(PR_CTX3_4, 0x00000000); /* method traps disabled */ 330 ACCW(PR_CTX0_5, 0x00000000); /* extra */ 331 ACCW(PR_CTX1_5, 0x00000000); /* extra */ 332 /* setup set '4' for cmd NV12_IMAGE_BLIT */ 333 ACCW(PR_CTX0_6, 0x0208009f); /* NVclass $09f, patchcfg ROP_AND, nv10+: little endian */ 334 ACCW(PR_CTX1_6, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 335 ACCW(PR_CTX2_6, 0x00001140); /* DMA0 instance is $1140, DMA1 instance invalid */ 336 ACCW(PR_CTX3_6, 0x00001140); /* method trap 0 is $1140, trap 1 disabled */ 337 ACCW(PR_CTX0_7, 0x00000000); /* extra */ 338 ACCW(PR_CTX1_7, 0x00000000); /* extra */ 339 /* setup set '5' for cmd NV4_GDI_RECTANGLE_TEXT */ 340 ACCW(PR_CTX0_8, 0x0208004a); /* NVclass $04a, patchcfg ROP_AND, nv10+: little endian */ 341 ACCW(PR_CTX1_8, 0x02000000); /* colorspace not set, notify instance is $0200 (b16-31) */ 342 ACCW(PR_CTX2_8, 0x00000000); /* DMA0 and DMA1 instance invalid */ 343 ACCW(PR_CTX3_8, 0x00000000); /* method traps disabled */ 344 ACCW(PR_CTX0_9, 0x00000000); /* extra */ 345 ACCW(PR_CTX1_9, 0x00000000); /* extra */ 346 /* setup set '6' for cmd NV10_CONTEXT_SURFACES_2D */ 347 ACCW(PR_CTX0_A, 0x02080062); /* NVclass $062, nv10+: little endian */ 348 ACCW(PR_CTX1_A, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 349 ACCW(PR_CTX2_A, 0x00001140); /* DMA0 instance is $1140, DMA1 instance invalid */ 350 ACCW(PR_CTX3_A, 0x00001140); /* method trap 0 is $1140, trap 1 disabled */ 351 ACCW(PR_CTX0_B, 0x00000000); /* extra */ 352 ACCW(PR_CTX1_B, 0x00000000); /* extra */ 353 /* setup set '7' for cmd NV_SCALED_IMAGE_FROM_MEMORY */ 354 ACCW(PR_CTX0_C, 0x02080077); /* NVclass $077, nv10+: little endian */ 355 ACCW(PR_CTX1_C, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 356 ACCW(PR_CTX2_C, 0x00001140); /* DMA0 instance is $1140, DMA1 instance invalid */ 357 ACCW(PR_CTX3_C, 0x00001140); /* method trap 0 is $1140, trap 1 disabled */ 358 ACCW(PR_CTX0_D, 0x00000000); /* extra */ 359 ACCW(PR_CTX1_D, 0x00000000); /* extra */ 360 /* setup DMA set pointed at by PF_CACH1_DMAI */ 361 ACCW(PR_CTX0_E, 0x00003002); /* DMA page table present and of linear type; 362 * DMA class is $002 (b0-11); 363 * DMA target node is NVM (non-volatile memory?) 364 * (instead of doing PCI or AGP transfers) */ 365 ACCW(PR_CTX1_E, 0x00007fff); /* DMA limit: tablesize is 32k bytes */ 366 ACCW(PR_CTX2_E, (((si->ps.memory_size - 1) & 0xffff8000) | 0x00000002)); 367 /* DMA access type is READ_AND_WRITE; 368 * table is located at end of cardRAM (b12-31): 369 * It's adress needs to be at a 4kb boundary! */ 370 } 371 else 372 { 373 /* setup a DMA define for use by command defines below. */ 374 ACCW(PR_CTX0_R, 0x00003000); /* DMA page table present and of linear type; 375 * DMA target node is NVM (non-volatile memory?) 376 * (instead of doing PCI or AGP transfers) */ 377 ACCW(PR_CTX1_R, (si->ps.memory_size - 1)); /* DMA limit: size is all cardRAM */ 378 ACCW(PR_CTX2_R, ((0x00000000 & 0xfffff000) | 0x00000002)); 379 /* DMA access type is READ_AND_WRITE; 380 * memory starts at start of cardRAM (b12-31): 381 * It's adress needs to be at a 4kb boundary! */ 382 ACCW(PR_CTX3_R, 0x00000002); /* unknown (looks like this is rubbish/not needed?) */ 383 /* setup set '0' for cmd NV_ROP5_SOLID */ 384 ACCW(PR_CTX0_0, 0x01008043); /* NVclass $043, patchcfg ROP_AND, nv10+: little endian */ 385 ACCW(PR_CTX1_0, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 386 ACCW(PR_CTX2_0, 0x00000000); /* DMA0 and DMA1 instance invalid */ 387 ACCW(PR_CTX3_0, 0x00000000); /* method traps disabled */ 388 /* setup set '1' for cmd NV_IMAGE_BLACK_RECTANGLE */ 389 ACCW(PR_CTX0_1, 0x01008019); /* NVclass $019, patchcfg ROP_AND, nv10+: little endian */ 390 ACCW(PR_CTX1_1, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 391 ACCW(PR_CTX2_1, 0x00000000); /* DMA0 and DMA1 instance invalid */ 392 ACCW(PR_CTX3_1, 0x00000000); /* method traps disabled */ 393 /* setup set '2' for cmd NV_IMAGE_PATTERN */ 394 ACCW(PR_CTX0_2, 0x01008018); /* NVclass $018, patchcfg ROP_AND, nv10+: little endian */ 395 ACCW(PR_CTX1_2, 0x00000002); /* colorspace not set, notify instance is $0200 (b16-31) */ 396 ACCW(PR_CTX2_2, 0x00000000); /* DMA0 and DMA1 instance invalid */ 397 ACCW(PR_CTX3_2, 0x00000000); /* method traps disabled */ 398 /* setup set '3' for ... */ 399 if(si->ps.card_arch >= NV10A) 400 { 401 /* ... cmd NV10_CONTEXT_SURFACES_2D */ 402 ACCW(PR_CTX0_3, 0x01008062); /* NVclass $062, nv10+: little endian */ 403 } 404 else 405 { 406 /* ... cmd NV4_SURFACE */ 407 ACCW(PR_CTX0_3, 0x01008042); /* NVclass $042, nv10+: little endian */ 408 } 409 ACCW(PR_CTX1_3, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 410 ACCW(PR_CTX2_3, 0x11401140); /* DMA0 instance is $1140, DMA1 instance invalid */ 411 ACCW(PR_CTX3_3, 0x00000000); /* method trap 0 is $1140, trap 1 disabled */ 412 /* setup set '4' for ... */ 413 if (si->ps.card_type >= NV11) 414 { 415 /* ... cmd NV12_IMAGE_BLIT */ 416 ACCW(PR_CTX0_4, 0x0100809f); /* NVclass $09f, patchcfg ROP_AND, nv10+: little endian */ 417 } 418 else 419 { 420 /* ... cmd NV_IMAGE_BLIT */ 421 ACCW(PR_CTX0_4, 0x0100805f); /* NVclass $05f, patchcfg ROP_AND, nv10+: little endian */ 422 } 423 ACCW(PR_CTX1_4, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 424 ACCW(PR_CTX2_4, 0x11401140); /* DMA0 instance is $1140, DMA1 instance invalid */ 425 ACCW(PR_CTX3_4, 0x00000000); /* method trap 0 is $1140, trap 1 disabled */ 426 /* setup set '5' for cmd NV4_GDI_RECTANGLE_TEXT */ 427 ACCW(PR_CTX0_5, 0x0100804a); /* NVclass $04a, patchcfg ROP_AND, nv10+: little endian */ 428 ACCW(PR_CTX1_5, 0x00000002); /* colorspace not set, notify instance is $0200 (b16-31) */ 429 ACCW(PR_CTX2_5, 0x00000000); /* DMA0 and DMA1 instance invalid */ 430 ACCW(PR_CTX3_5, 0x00000000); /* method traps disabled */ 431 /* setup set '6' ... */ 432 if (si->ps.card_arch >= NV10A) 433 { 434 /* ... for cmd NV10_CONTEXT_SURFACES_ARGB_ZS */ 435 ACCW(PR_CTX0_6, 0x00000093); /* NVclass $093, nv10+: little endian */ 436 } 437 else 438 { 439 /* ... for cmd NV4_CONTEXT_SURFACES_ARGB_ZS */ 440 ACCW(PR_CTX0_6, 0x00000053); /* NVclass $053, nv10+: little endian */ 441 } 442 ACCW(PR_CTX1_6, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 443 ACCW(PR_CTX2_6, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 444 ACCW(PR_CTX3_6, 0x00000000); /* method traps disabled */ 445 /* setup set '7' ... */ 446 if (si->ps.card_arch >= NV10A) 447 { 448 /* ... for cmd NV10_DX5_TEXTURE_TRIANGLE */ 449 ACCW(PR_CTX0_7, 0x0300a094); /* NVclass $094, patchcfg ROP_AND, userclip enable, 450 * context surface0 valid, nv10+: little endian */ 451 } 452 else 453 { 454 /* ... for cmd NV4_DX5_TEXTURE_TRIANGLE */ 455 ACCW(PR_CTX0_7, 0x0300a054); /* NVclass $054, patchcfg ROP_AND, userclip enable, 456 * context surface0 valid */ 457 } 458 ACCW(PR_CTX1_7, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 459 ACCW(PR_CTX2_7, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 460 ACCW(PR_CTX3_7, 0x00000000); /* method traps disabled */ 461 /* setup set '8' ... */ 462 if (si->ps.card_arch >= NV10A) 463 { 464 /* ... for cmd NV10_DX6_MULTI_TEXTURE_TRIANGLE (not used) */ 465 ACCW(PR_CTX0_8, 0x0300a095); /* NVclass $095, patchcfg ROP_AND, userclip enable, 466 * context surface0 valid, nv10+: little endian */ 467 } 468 else 469 { 470 /* ... for cmd NV4_DX6_MULTI_TEXTURE_TRIANGLE (not used) */ 471 ACCW(PR_CTX0_8, 0x0300a055); /* NVclass $055, patchcfg ROP_AND, userclip enable, 472 * context surface0 valid */ 473 } 474 ACCW(PR_CTX1_8, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 475 ACCW(PR_CTX2_8, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 476 ACCW(PR_CTX3_8, 0x00000000); /* method traps disabled */ 477 /* setup set '9' for cmd NV_SCALED_IMAGE_FROM_MEMORY */ 478 ACCW(PR_CTX0_9, 0x01018077); /* NVclass $077, patchcfg SRC_COPY, 479 * context surface0 valid, nv10+: little endian */ 480 ACCW(PR_CTX1_9, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 481 ACCW(PR_CTX2_9, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 482 ACCW(PR_CTX3_9, 0x00000000); /* method traps disabled */ 483 /* setup set 'A' for cmd NV1_RENDER_SOLID_LIN (not used) */ 484 ACCW(PR_CTX0_A, 0x0300a01c); /* NVclass $01c, patchcfg ROP_AND, userclip enable, 485 * context surface0 valid, nv10+: little endian */ 486 ACCW(PR_CTX1_A, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 487 ACCW(PR_CTX2_A, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 488 ACCW(PR_CTX3_A, 0x00000000); /* method traps disabled */ 489 //2007 3D tests.. 490 /* setup set 'B' ... */ 491 if (si->ps.card_type == NV15) 492 { 493 /* ... for cmd NV11_TCL_PRIMITIVE_3D */ 494 ACCW(PR_CTX0_B, 0x0300a096); /* NVclass $096, patchcfg ROP_AND, userclip enable, 495 * context surface0 valid, nv10+: little endian */ 496 ACCW(PR_CTX1_B, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 497 ACCW(PR_CTX2_B, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 498 ACCW(PR_CTX3_B, 0x00000000); /* method traps disabled */ 499 } 500 /* setup DMA set pointed at by PF_CACH1_DMAI */ 501 if (si->engine.agp_mode) 502 { 503 /* DMA page table present and of linear type; 504 * DMA class is $002 (b0-11); 505 * DMA target node is AGP */ 506 ACCW(PR_CTX0_C, 0x00033002); 507 } 508 else 509 { 510 /* DMA page table present and of linear type; 511 * DMA class is $002 (b0-11); 512 * DMA target node is PCI */ 513 ACCW(PR_CTX0_C, 0x00023002); 514 } 515 ACCW(PR_CTX1_C, 0x000fffff); /* DMA limit: tablesize is 1M bytes */ 516 ACCW(PR_CTX2_C, (((uintptr_t)((uint8 *)(si->dma_buffer_pci))) | 0x00000002)); 517 /* DMA access type is READ_AND_WRITE; 518 * table is located in main system RAM (b12-31): 519 * It's adress needs to be at a 4kb boundary! */ 520 521 /* set the 3D rendering functions colordepth via BPIXEL's 'depth 2' */ 522 /* note: 523 * setting a depth to 'invalid' (zero) makes the engine report 524 * ready with drawing 'immediately'. */ 525 //fixme: NV30A and above (probably) needs to be corrected... 526 switch(si->dm.space) 527 { 528 case B_CMAP8: 529 if (si->ps.card_arch < NV30A) 530 /* set depth 2: $1 = Y8 */ 531 ACCW(BPIXEL, 0x00000100); 532 else 533 /* set depth 0-1: $1 = Y8, $2 = X1R5G5B5_Z1R5G5B5 */ 534 ACCW(BPIXEL, 0x00000021); 535 break; 536 case B_RGB15_LITTLE: 537 if (si->ps.card_arch < NV30A) 538 /* set depth 2: $4 = A1R5G5B5 */ 539 ACCW(BPIXEL, 0x00000400); 540 else 541 /* set depth 0-1: $2 = X1R5G5B5_Z1R5G5B5, $4 = A1R5G5B5 */ 542 ACCW(BPIXEL, 0x00000042); 543 break; 544 case B_RGB16_LITTLE: 545 if (si->ps.card_arch < NV30A) 546 /* set depth 2: $5 = R5G6B5 */ 547 ACCW(BPIXEL, 0x00000500); 548 else 549 /* set depth 0-1: $5 = R5G6B5, $a = X1A7R8G8B8_O1A7R8G8B8 */ 550 ACCW(BPIXEL, 0x000000a5); 551 break; 552 case B_RGB32_LITTLE: 553 case B_RGBA32_LITTLE: 554 if (si->ps.card_arch < NV30A) 555 /* set depth 2: $c = A8R8G8B8 */ 556 ACCW(BPIXEL, 0x00000c00); 557 else 558 /* set depth 0-1: $7 = X8R8G8B8_Z8R8G8B8, $e = V8YB8U8YA8 */ 559 ACCW(BPIXEL, 0x000000e7); 560 break; 561 default: 562 LOG(8,("ACC: init, invalid bit depth\n")); 563 return B_ERROR; 564 } 565 } 566 567 if (si->ps.card_arch == NV04A) 568 { 569 /* do a explicit engine reset */ 570 ACCW(DEBUG0, 0x000001ff); 571 572 /* init some function blocks */ 573 /* DEBUG0, b20 and b21 should be high, this has a big influence on 574 * 3D rendering speed! (on all cards, confirmed) */ 575 ACCW(DEBUG0, 0x1230c000); 576 /* DEBUG1, b19 = 1 increases 3D rendering speed on TNT2 (M64) a bit, 577 * TNT1 rendering speed stays the same (all cards confirmed) */ 578 ACCW(DEBUG1, 0x72191101); 579 ACCW(DEBUG2, 0x11d5f071); 580 ACCW(DEBUG3, 0x0004ff31); 581 /* init OP methods */ 582 ACCW(DEBUG3, 0x4004ff31); 583 584 /* disable all acceleration engine INT reguests */ 585 ACCW(ACC_INTE, 0x00000000); 586 /* reset all acceration engine INT status bits */ 587 ACCW(ACC_INTS, 0xffffffff); 588 /* context control enabled */ 589 ACCW(NV04_CTX_CTRL, 0x10010100); 590 /* all acceleration buffers, pitches and colors are valid */ 591 ACCW(NV04_ACC_STAT, 0xffffffff); 592 /* enable acceleration engine command FIFO */ 593 ACCW(FIFO_EN, 0x00000001); 594 595 /* setup location of active screen in framebuffer */ 596 ACCW(OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 597 ACCW(OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 598 /* setup accesible card memory range */ 599 ACCW(BLIMIT0, (si->ps.memory_size - 1)); 600 ACCW(BLIMIT1, (si->ps.memory_size - 1)); 601 602 /* pattern shape value = 8x8, 2 color */ 603 //fixme: not needed, unless the engine has a hardware fault (setting via cmd)! 604 //ACCW(PAT_SHP, 0x00000000); 605 /* Pgraph Beta AND value (fraction) b23-30 */ 606 ACCW(BETA_AND_VAL, 0xffffffff); 607 } 608 else 609 { 610 /* do a explicit engine reset */ 611 ACCW(DEBUG0, 0xffffffff); 612 ACCW(DEBUG0, 0x00000000); 613 /* disable all acceleration engine INT reguests */ 614 ACCW(ACC_INTE, 0x00000000); 615 /* reset all acceration engine INT status bits */ 616 ACCW(ACC_INTS, 0xffffffff); 617 /* context control enabled */ 618 ACCW(NV10_CTX_CTRL, 0x10010100); 619 /* all acceleration buffers, pitches and colors are valid */ 620 ACCW(NV10_ACC_STAT, 0xffffffff); 621 /* enable acceleration engine command FIFO */ 622 ACCW(FIFO_EN, 0x00000001); 623 /* setup surface type: 624 * b1-0 = %01 = surface type is non-swizzle; 625 * this is needed to enable 3D on NV1x (confirmed) and maybe others? */ 626 ACCW(NV10_SURF_TYP, ((ACCR(NV10_SURF_TYP)) & 0x0007ff00)); 627 ACCW(NV10_SURF_TYP, ((ACCR(NV10_SURF_TYP)) | 0x00020101)); 628 } 629 630 if (si->ps.card_arch == NV10A) 631 { 632 /* init some function blocks */ 633 ACCW(DEBUG1, 0x00118700); 634 /* DEBUG2 has a big influence on 3D speed for NV11 and NV15 635 * (confirmed b3 and b18 should both be '1' on both cards!) 636 * (b16 should also be '1', increases 3D speed on NV11 a bit more) */ 637 ACCW(DEBUG2, 0x24fd2ad9); 638 ACCW(DEBUG3, 0x55de0030); 639 /* NV10_DEBUG4 has a big influence on 3D speed for NV11, NV15 and NV18 640 * (confirmed b14 and b15 should both be '1' on these cards!) 641 * (confirmed b8 should be '0' on NV18 to prevent complete engine crash!) */ 642 ACCW(NV10_DEBUG4, 0x0000c000); 643 644 /* copy tile setup stuff from 'source' to acc engine */ 645 for (cnt = 0; cnt < 32; cnt++) 646 { 647 NV_REG32(NVACC_NV10_TIL0AD + (cnt << 2)) = 648 NV_REG32(NVACC_NV10_FBTIL0AD + (cnt << 2)); 649 } 650 651 /* setup location of active screen in framebuffer */ 652 ACCW(OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 653 ACCW(OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 654 /* setup accesible card memory range */ 655 ACCW(BLIMIT0, (si->ps.memory_size - 1)); 656 ACCW(BLIMIT1, (si->ps.memory_size - 1)); 657 658 /* pattern shape value = 8x8, 2 color */ 659 //fixme: not needed, unless the engine has a hardware fault (setting via cmd)! 660 //ACCW(PAT_SHP, 0x00000000); 661 /* Pgraph Beta AND value (fraction) b23-30 */ 662 ACCW(BETA_AND_VAL, 0xffffffff); 663 } 664 665 if (si->ps.card_arch >= NV20A) 666 { 667 switch (si->ps.card_arch) 668 { 669 case NV40A: 670 /* init some function blocks */ 671 ACCW(DEBUG1, 0x401287c0); 672 ACCW(DEBUG3, 0x60de8051); 673 /* disable specific functions, but enable SETUP_SPARE2 register */ 674 ACCW(NV10_DEBUG4, 0x00008000); 675 /* set limit_viol_pix_adress(?): more likely something unknown.. */ 676 ACCW(NV25_WHAT0, 0x00be3c5f); 677 678 /* setup some unknown serially accessed registers (?) */ 679 tmp = (NV_REG32(NV32_NV4X_WHAT0) & 0x000000ff); 680 for (cnt = 0; (tmp && !(tmp & 0x00000001)); tmp >>= 1, cnt++) 681 { 682 ACCW(NV4X_WHAT2, cnt); 683 } 684 685 /* unknown.. */ 686 switch (si->ps.card_type) 687 { 688 case NV40: 689 case NV45: 690 /* and NV48: but these are pgm'd as NV45 currently */ 691 ACCW(NV40_WHAT0, 0x83280fff); 692 ACCW(NV40_WHAT1, 0x000000a0); 693 ACCW(NV40_WHAT2, 0x0078e366); 694 ACCW(NV40_WHAT3, 0x0000014c); 695 break; 696 case NV41: 697 /* and ID == 0x012x: but no cards defined yet */ 698 ACCW(NV40P_WHAT0, 0x83280eff); 699 ACCW(NV40P_WHAT1, 0x000000a0); 700 ACCW(NV40P_WHAT2, 0x007596ff); 701 ACCW(NV40P_WHAT3, 0x00000108); 702 break; 703 case NV43: 704 ACCW(NV40P_WHAT0, 0x83280eff); 705 ACCW(NV40P_WHAT1, 0x000000a0); 706 ACCW(NV40P_WHAT2, 0x0072cb77); 707 ACCW(NV40P_WHAT3, 0x00000108); 708 break; 709 case NV44: 710 case G72: 711 ACCW(NV40P_WHAT0, 0x83280eff); 712 ACCW(NV40P_WHAT1, 0x000000a0); 713 714 NV_REG32(NV32_NV44_WHAT10) = NV_REG32(NV32_NV10STRAPINFO); 715 NV_REG32(NV32_NV44_WHAT11) = 0x00000000; 716 NV_REG32(NV32_NV44_WHAT12) = 0x00000000; 717 NV_REG32(NV32_NV44_WHAT13) = NV_REG32(NV32_NV10STRAPINFO); 718 719 ACCW(NV44_WHAT2, 0x00000000); 720 ACCW(NV44_WHAT3, 0x00000000); 721 break; 722 /* case NV44 type 2: (cardID 0x022x) 723 //fixme if needed: doesn't seem to need the strapinfo thing.. 724 ACCW(NV40P_WHAT0, 0x83280eff); 725 ACCW(NV40P_WHAT1, 0x000000a0); 726 727 ACCW(NV44_WHAT2, 0x00000000); 728 ACCW(NV44_WHAT3, 0x00000000); 729 break; 730 */ case G70: 731 case G71: 732 case G73: 733 ACCW(NV40P_WHAT0, 0x83280eff); 734 ACCW(NV40P_WHAT1, 0x000000a0); 735 ACCW(NV40P_WHAT2, 0x07830610); 736 ACCW(NV40P_WHAT3, 0x0000016a); 737 break; 738 default: 739 ACCW(NV40P_WHAT0, 0x83280eff); 740 ACCW(NV40P_WHAT1, 0x000000a0); 741 break; 742 } 743 744 ACCW(NV10_TIL3PT, 0x2ffff800); 745 ACCW(NV10_TIL3ST, 0x00006000); 746 ACCW(NV4X_WHAT1, 0x01000000); 747 /* engine data source DMA instance = $1140 */ 748 ACCW(NV4X_DMA_SRC, 0x00001140); 749 break; 750 case NV30A: 751 /* init some function blocks, but most is unknown.. */ 752 ACCW(DEBUG1, 0x40108700); 753 ACCW(NV25_WHAT1, 0x00140000); 754 ACCW(DEBUG3, 0xf00e0431); 755 ACCW(NV10_DEBUG4, 0x00008000); 756 ACCW(NV25_WHAT0, 0xf04b1f36); 757 ACCW(NV20_WHAT3, 0x1002d888); 758 ACCW(NV25_WHAT2, 0x62ff007f); 759 break; 760 case NV20A: 761 /* init some function blocks, but most is unknown.. */ 762 ACCW(DEBUG1, 0x00118700); 763 ACCW(DEBUG3, 0xf20e0431); 764 ACCW(NV10_DEBUG4, 0x00000000); 765 ACCW(NV20_WHAT1, 0x00000040); 766 if (si->ps.card_type < NV25) 767 { 768 ACCW(NV20_WHAT2, 0x00080000); 769 ACCW(NV10_DEBUG5, 0x00000005); 770 ACCW(NV20_WHAT3, 0x45caa208); 771 ACCW(NV20_WHAT4, 0x24000000); 772 ACCW(NV20_WHAT5, 0x00000040); 773 774 /* copy some fixed RAM(?) configuration info(?) to some indexed registers: */ 775 /* b16-24 is select; b2-13 is adress in 32-bit words */ 776 ACCW(RDI_INDEX, 0x00e00038); 777 /* data is 32-bit */ 778 ACCW(RDI_DATA, 0x00000030); 779 /* copy some fixed RAM(?) configuration info(?) to some indexed registers: */ 780 /* b16-24 is select; b2-13 is adress in 32-bit words */ 781 ACCW(RDI_INDEX, 0x00e10038); 782 /* data is 32-bit */ 783 ACCW(RDI_DATA, 0x00000030); 784 } 785 else 786 { 787 ACCW(NV25_WHAT1, 0x00080000); 788 ACCW(NV25_WHAT0, 0x304b1fb6); 789 ACCW(NV20_WHAT3, 0x18b82880); 790 ACCW(NV20_WHAT4, 0x44000000); 791 ACCW(NV20_WHAT5, 0x40000080); 792 ACCW(NV25_WHAT2, 0x000000ff); 793 } 794 break; 795 } 796 797 /* NV20A, NV30A and NV40A: */ 798 /* copy tile setup stuff from previous setup 'source' to acc engine 799 * (pattern colorRAM?) */ 800 if ((si->ps.card_type <= NV40) || (si->ps.card_type == NV45)) 801 { 802 for (cnt = 0; cnt < 32; cnt++) 803 { 804 /* copy NV10_FBTIL0AD upto/including NV10_FBTIL7ST */ 805 NV_REG32(NVACC_NV20_WHAT0 + (cnt << 2)) = 806 NV_REG32(NVACC_NV10_FBTIL0AD + (cnt << 2)); 807 808 /* copy NV10_FBTIL0AD upto/including NV10_FBTIL7ST */ 809 NV_REG32(NVACC_NV20_2_WHAT0 + (cnt << 2)) = 810 NV_REG32(NVACC_NV10_FBTIL0AD + (cnt << 2)); 811 } 812 } 813 else 814 { 815 /* NV41, 43, 44, G70 and later */ 816 if (si->ps.card_type >= G70) 817 { 818 for (cnt = 0; cnt < 60; cnt++) 819 { 820 /* copy NV41_FBTIL0AD upto/including G70_FBTILEST */ 821 NV_REG32(NVACC_NV41_WHAT0 + (cnt << 2)) = 822 NV_REG32(NVACC_NV41_FBTIL0AD + (cnt << 2)); 823 824 /* copy NV41_FBTIL0AD upto/including G70_FBTILEST */ 825 NV_REG32(NVACC_NV20_2_WHAT0 + (cnt << 2)) = 826 NV_REG32(NVACC_NV41_FBTIL0AD + (cnt << 2)); 827 } 828 } 829 else 830 { 831 /* NV41, 43, 44 */ 832 for (cnt = 0; cnt < 48; cnt++) 833 { 834 /* copy NV41_FBTIL0AD upto/including NV41_FBTILBST */ 835 NV_REG32(NVACC_NV20_WHAT0 + (cnt << 2)) = 836 NV_REG32(NVACC_NV41_FBTIL0AD + (cnt << 2)); 837 838 if (si->ps.card_type != NV44) 839 { 840 /* copy NV41_FBTIL0AD upto/including NV41_FBTILBST */ 841 NV_REG32(NVACC_NV20_2_WHAT0 + (cnt << 2)) = 842 NV_REG32(NVACC_NV41_FBTIL0AD + (cnt << 2)); 843 } 844 } 845 } 846 } 847 848 if (si->ps.card_arch >= NV40A) 849 { 850 if ((si->ps.card_type == NV40) || (si->ps.card_type == NV45)) 851 { 852 /* copy some RAM configuration info(?) */ 853 ACCW(NV20_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0)); 854 ACCW(NV20_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1)); 855 ACCW(NV40_WHAT_T2, NV_REG32(NV32_PFB_CONFIG_0)); 856 ACCW(NV40_WHAT_T3, NV_REG32(NV32_PFB_CONFIG_1)); 857 858 /* setup location of active screen in framebuffer */ 859 ACCW(NV20_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 860 ACCW(NV20_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 861 /* setup accesible card memory range */ 862 ACCW(NV20_BLIMIT6, (si->ps.memory_size - 1)); 863 ACCW(NV20_BLIMIT7, (si->ps.memory_size - 1)); 864 } 865 else 866 { 867 /* NV41, 43, 44, G70 and later */ 868 869 /* copy some RAM configuration info(?) */ 870 if (si->ps.card_type >= G70) 871 { 872 ACCW(G70_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0)); 873 ACCW(G70_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1)); 874 } 875 else 876 { 877 /* NV41, 43, 44 */ 878 ACCW(NV40P_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0)); 879 ACCW(NV40P_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1)); 880 } 881 ACCW(NV40P_WHAT_T2, NV_REG32(NV32_PFB_CONFIG_0)); 882 ACCW(NV40P_WHAT_T3, NV_REG32(NV32_PFB_CONFIG_1)); 883 884 /* setup location of active screen in framebuffer */ 885 ACCW(NV40P_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 886 ACCW(NV40P_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 887 /* setup accesible card memory range */ 888 ACCW(NV40P_BLIMIT6, (si->ps.memory_size - 1)); 889 ACCW(NV40P_BLIMIT7, (si->ps.memory_size - 1)); 890 } 891 } 892 else /* NV20A and NV30A: */ 893 { 894 /* copy some RAM configuration info(?) */ 895 ACCW(NV20_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0)); 896 ACCW(NV20_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1)); 897 /* copy some RAM configuration info(?) to some indexed registers: */ 898 /* b16-24 is select; b2-13 is adress in 32-bit words */ 899 ACCW(RDI_INDEX, 0x00ea0000); 900 /* data is 32-bit */ 901 ACCW(RDI_DATA, NV_REG32(NV32_PFB_CONFIG_0)); 902 /* b16-24 is select; b2-13 is adress in 32-bit words */ 903 ACCW(RDI_INDEX, 0x00ea0004); 904 /* data is 32-bit */ 905 ACCW(RDI_DATA, NV_REG32(NV32_PFB_CONFIG_1)); 906 907 /* setup location of active screen in framebuffer */ 908 ACCW(NV20_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 909 ACCW(NV20_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 910 /* setup accesible card memory range */ 911 ACCW(NV20_BLIMIT6, (si->ps.memory_size - 1)); 912 ACCW(NV20_BLIMIT7, (si->ps.memory_size - 1)); 913 } 914 915 /* NV20A, NV30A and NV40A: */ 916 /* setup some acc engine tile stuff */ 917 ACCW(NV10_TIL2AD, 0x00000000); 918 ACCW(NV10_TIL0ED, 0xffffffff); 919 } 920 921 /* all cards: */ 922 /* setup clipping: rect size is 32768 x 32768, probably max. setting */ 923 /* note: 924 * can also be done via the NV_IMAGE_BLACK_RECTANGLE engine command. */ 925 ACCW(ABS_UCLP_XMIN, 0x00000000); 926 ACCW(ABS_UCLP_YMIN, 0x00000000); 927 ACCW(ABS_UCLP_XMAX, 0x00007fff); 928 ACCW(ABS_UCLP_YMAX, 0x00007fff); 929 930 /* setup sync parameters for NV12_IMAGE_BLIT command for the current mode: 931 * values given are CRTC vertical counter limit values. The NV12 command will wait 932 * for the specified's CRTC's vertical counter to be in between the given values */ 933 if (si->ps.card_type >= NV11) 934 { 935 ACCW(NV11_CRTC_LO, si->dm.timing.v_display - 1); 936 ACCW(NV11_CRTC_HI, si->dm.timing.v_display + 1); 937 } 938 939 /*** PFIFO ***/ 940 /* (setup caches) */ 941 /* disable caches reassign */ 942 ACCW(PF_CACHES, 0x00000000); 943 /* PFIFO mode: channel 0 is in DMA mode, channels 1 - 32 are in PIO mode */ 944 ACCW(PF_MODE, 0x00000001); 945 /* cache1 push0 access disabled */ 946 ACCW(PF_CACH1_PSH0, 0x00000000); 947 /* cache1 pull0 access disabled */ 948 ACCW(PF_CACH1_PUL0, 0x00000000); 949 /* cache1 push1 mode = DMA */ 950 if (si->ps.card_arch >= NV40A) 951 ACCW(PF_CACH1_PSH1, 0x00010000); 952 else 953 ACCW(PF_CACH1_PSH1, 0x00000100); 954 /* cache1 DMA Put offset = 0 (b2-28) */ 955 ACCW(PF_CACH1_DMAP, 0x00000000); 956 /* cache1 DMA Get offset = 0 (b2-28) */ 957 ACCW(PF_CACH1_DMAG, 0x00000000); 958 /* cache1 DMA instance adress = $114e (b0-15); 959 * instance being b4-19 with baseadress NV_PRAMIN_CTX_0 (0x00700000). */ 960 /* note: 961 * should point to a DMA definition in CTX register space (which is sort of RAM). 962 * This define tells the engine where the DMA cmd buffer is and what it's size is. 963 * Inside that cmd buffer you'll find the actual issued engine commands. */ 964 if (si->ps.card_arch >= NV40A) 965 ACCW(PF_CACH1_DMAI, 0x00001150); 966 else 967 //2007 3d test.. 968 ACCW(PF_CACH1_DMAI, 0x0000114e); 969 /* cache0 push0 access disabled */ 970 ACCW(PF_CACH0_PSH0, 0x00000000); 971 /* cache0 pull0 access disabled */ 972 ACCW(PF_CACH0_PUL0, 0x00000000); 973 /* RAM HT (hash table) baseadress = $10000 (b4-8), size = 4k, 974 * search = 128 (is byte offset between hash 'sets') */ 975 /* note: 976 * so HT base is $00710000, last is $00710fff. 977 * In this space you define the engine command handles (HT_HANDL_XX), which 978 * in turn points to the defines in CTX register space (which is sort of RAM) */ 979 ACCW(PF_RAMHT, 0x03000100); 980 /* RAM FC baseadress = $11000 (b3-8) (size is fixed to 0.5k(?)) */ 981 /* note: 982 * so FC base is $00711000, last is $007111ff. (not used?) */ 983 ACCW(PF_RAMFC, 0x00000110); 984 /* RAM RO baseadress = $11200 (b1-8), size = 0.5k */ 985 /* note: 986 * so RO base is $00711200, last is $007113ff. (not used?) */ 987 /* note also: 988 * This means(?) the PRAMIN CTX registers are accessible from base $00711400. */ 989 ACCW(PF_RAMRO, 0x00000112); 990 /* PFIFO size: ch0-15 = 512 bytes, ch16-31 = 124 bytes */ 991 ACCW(PF_SIZE, 0x0000ffff); 992 /* cache1 hash instance = $ffff (b0-15) */ 993 ACCW(PF_CACH1_HASH, 0x0000ffff); 994 /* disable all PFIFO INTs */ 995 ACCW(PF_INTEN, 0x00000000); 996 /* reset all PFIFO INT status bits */ 997 ACCW(PF_INTSTAT, 0xffffffff); 998 /* cache0 pull0 engine = acceleration engine (graphics) */ 999 ACCW(PF_CACH0_PUL1, 0x00000001); 1000 /* cache1 DMA control: disable some stuff */ 1001 ACCW(PF_CACH1_DMAC, 0x00000000); 1002 /* cache1 engine 0 upto/including 7 is software (could also be graphics or DVD) */ 1003 ACCW(PF_CACH1_ENG, 0x00000000); 1004 /* cache1 DMA fetch: trigger at 128 bytes, size is 32 bytes, max requests is 15, 1005 * use little endian */ 1006 ACCW(PF_CACH1_DMAF, 0x000f0078); 1007 /* cache1 DMA push: b0 = 1: access is enabled */ 1008 ACCW(PF_CACH1_DMAS, 0x00000001); 1009 /* cache1 push0 access enabled */ 1010 ACCW(PF_CACH1_PSH0, 0x00000001); 1011 /* cache1 pull0 access enabled */ 1012 ACCW(PF_CACH1_PUL0, 0x00000001); 1013 /* cache1 pull1 engine = acceleration engine (graphics) */ 1014 ACCW(PF_CACH1_PUL1, 0x00000001); 1015 /* enable PFIFO caches reassign */ 1016 ACCW(PF_CACHES, 0x00000001); 1017 1018 /* setup 3D specifics */ 1019 nv_init_for_3D_dma(); 1020 1021 /*** init acceleration engine command info ***/ 1022 /* set object handles */ 1023 /* note: 1024 * probably depending on some other setup, there are 8 or 32 FIFO channels 1025 * available. Assuming the current setup only has 8 channels because the 'rest' 1026 * isn't setup here... */ 1027 si->engine.fifo.handle[0] = NV_ROP5_SOLID; 1028 si->engine.fifo.handle[1] = NV_IMAGE_BLACK_RECTANGLE; 1029 si->engine.fifo.handle[2] = NV_IMAGE_PATTERN; 1030 si->engine.fifo.handle[3] = NV4_SURFACE; /* NV10_CONTEXT_SURFACES_2D is identical */ 1031 si->engine.fifo.handle[4] = NV_IMAGE_BLIT; 1032 si->engine.fifo.handle[5] = NV4_GDI_RECTANGLE_TEXT; 1033 si->engine.fifo.handle[6] = NV4_CONTEXT_SURFACES_ARGB_ZS;//NV1_RENDER_SOLID_LIN; 1034 si->engine.fifo.handle[7] = NV4_DX5_TEXTURE_TRIANGLE; 1035 /* preset no FIFO channels assigned to cmd's */ 1036 for (cnt = 0; cnt < 0x20; cnt++) 1037 { 1038 si->engine.fifo.ch_ptr[cnt] = 0; 1039 } 1040 /* set handle's pointers to their assigned FIFO channels */ 1041 /* note: 1042 * b0-1 aren't used as adressbits. Using b0 to indicate a valid pointer. */ 1043 for (cnt = 0; cnt < 0x08; cnt++) 1044 { 1045 si->engine.fifo.ch_ptr[(si->engine.fifo.handle[cnt])] 1046 = (0x00000001 + (cnt * 0x00002000)); 1047 } 1048 1049 /*** init DMA command buffer info ***/ 1050 if (si->ps.card_arch >= NV40A) //main mem DMA buf on pre-NV40 1051 { 1052 si->dma_buffer = (void *)((char *)si->framebuffer 1053 + ((si->ps.memory_size - 1) & 0xffff8000)); 1054 } 1055 LOG(4, ("ACC_DMA: command buffer is at adress $%p\n", si->dma_buffer)); 1056 /* we have issued no DMA cmd's to the engine yet */ 1057 si->engine.dma.put = 0; 1058 /* the current first free adress in the DMA buffer is at offset 0 */ 1059 si->engine.dma.current = 0; 1060 /* the DMA buffer can hold 8k 32-bit words (it's 32kb in size), 1061 * or 256k 32-bit words (1Mb in size) dependant on architecture (for now) */ 1062 /* note: 1063 * one word is reserved at the end of the DMA buffer to be able to instruct the 1064 * engine to do a buffer wrap-around! 1065 * (DMA opcode 'noninc method': issue word $20000000.) */ 1066 if (si->ps.card_arch < NV40A) 1067 si->engine.dma.max = ((1 * 1024 * 1024) >> 2) - 1; 1068 else 1069 si->engine.dma.max = 8192 - 1; 1070 /* note the current free space we have left in the DMA buffer */ 1071 si->engine.dma.free = si->engine.dma.max - si->engine.dma.current; 1072 1073 /*** init FIFO via DMA command buffer. ***/ 1074 /* wait for room in fifo for new FIFO assigment cmds if needed: */ 1075 if (si->ps.card_arch >= NV40A) 1076 { 1077 if (nv_acc_fifofree_dma(12) != B_OK) return B_ERROR; 1078 } else { 1079 if (nv_acc_fifofree_dma(16) != B_OK) return B_ERROR; 1080 } 1081 1082 /* program new FIFO assignments */ 1083 /* Raster OPeration: */ 1084 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH0, si->engine.fifo.handle[0]); 1085 /* Clip: */ 1086 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH1, si->engine.fifo.handle[1]); 1087 /* Pattern: */ 1088 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH2, si->engine.fifo.handle[2]); 1089 /* 2D Surfaces: */ 1090 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH3, si->engine.fifo.handle[3]); 1091 /* Blit: */ 1092 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH4, si->engine.fifo.handle[4]); 1093 /* Bitmap: */ 1094 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH5, si->engine.fifo.handle[5]); 1095 if (si->ps.card_arch < NV40A) 1096 { 1097 /* 3D surfaces: (3D related only) */ 1098 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH6, si->engine.fifo.handle[6]); 1099 /* Textured Triangle: (3D only) */ 1100 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH7, si->engine.fifo.handle[7]); 1101 } 1102 1103 /*** Set pixel width ***/ 1104 switch(si->dm.space) 1105 { 1106 case B_CMAP8: 1107 surf_depth = 0x00000001; 1108 cmd_depth = 0x00000003; 1109 break; 1110 case B_RGB15_LITTLE: 1111 case B_RGB16_LITTLE: 1112 surf_depth = 0x00000004; 1113 cmd_depth = 0x00000001; 1114 break; 1115 case B_RGB32_LITTLE: 1116 case B_RGBA32_LITTLE: 1117 surf_depth = 0x00000006; 1118 cmd_depth = 0x00000003; 1119 break; 1120 default: 1121 LOG(8,("ACC_DMA: init, invalid bit depth\n")); 1122 return B_ERROR; 1123 } 1124 1125 /* wait for room in fifo for surface setup cmd if needed */ 1126 if (nv_acc_fifofree_dma(5) != B_OK) return B_ERROR; 1127 /* now setup 2D surface (writing 5 32bit words) */ 1128 nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 4); 1129 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = surf_depth; /* Format */ 1130 /* setup screen pitch */ 1131 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1132 ((si->fbc.bytes_per_row & 0x0000ffff) | (si->fbc.bytes_per_row << 16)); /* Pitch */ 1133 /* setup screen location */ 1134 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1135 ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer); /* OffsetSource */ 1136 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1137 ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer); /* OffsetDest */ 1138 1139 /* wait for room in fifo for pattern colordepth setup cmd if needed */ 1140 if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR; 1141 /* set pattern colordepth (writing 2 32bit words) */ 1142 nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETCOLORFORMAT, 1); 1143 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ 1144 1145 /* wait for room in fifo for bitmap colordepth setup cmd if needed */ 1146 if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR; 1147 /* set bitmap colordepth (writing 2 32bit words) */ 1148 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_SETCOLORFORMAT, 1); 1149 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ 1150 1151 /* Load our pattern into the engine: */ 1152 /* wait for room in fifo for pattern cmd if needed. */ 1153 if (nv_acc_fifofree_dma(7) != B_OK) return B_ERROR; 1154 /* now setup pattern (writing 7 32bit words) */ 1155 nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETSHAPE, 1); 1156 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000000; /* SetShape: 0 = 8x8, 1 = 64x1, 2 = 1x64 */ 1157 nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETCOLOR0, 4); 1158 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetColor0 */ 1159 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetColor1 */ 1160 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetPattern[0] */ 1161 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetPattern[1] */ 1162 1163 /* tell the engine to fetch and execute all (new) commands in the DMA buffer */ 1164 nv_start_dma(); 1165 1166 return B_OK; 1167 } 1168 1169 static void nv_init_for_3D_dma(void) 1170 { 1171 /* setup PGRAPH unknown registers and modify (pre-cleared) pipe stuff for 3D use */ 1172 if (si->ps.card_arch >= NV10A) 1173 { 1174 /* setup unknown PGRAPH stuff */ 1175 ACCW(PGWHAT_00, 0x00000000); 1176 ACCW(PGWHAT_01, 0x00000000); 1177 ACCW(PGWHAT_02, 0x00000000); 1178 ACCW(PGWHAT_03, 0x00000000); 1179 1180 ACCW(PGWHAT_04, 0x00001000); 1181 ACCW(PGWHAT_05, 0x00001000); 1182 ACCW(PGWHAT_06, 0x4003ff80); 1183 1184 ACCW(PGWHAT_07, 0x00000000); 1185 ACCW(PGWHAT_08, 0x00000000); 1186 ACCW(PGWHAT_09, 0x00000000); 1187 ACCW(PGWHAT_0A, 0x00000000); 1188 ACCW(PGWHAT_0B, 0x00000000); 1189 1190 ACCW(PGWHAT_0C, 0x00080008); 1191 ACCW(PGWHAT_0D, 0x00080008); 1192 1193 ACCW(PGWHAT_0E, 0x00000000); 1194 ACCW(PGWHAT_0F, 0x00000000); 1195 ACCW(PGWHAT_10, 0x00000000); 1196 ACCW(PGWHAT_11, 0x00000000); 1197 ACCW(PGWHAT_12, 0x00000000); 1198 ACCW(PGWHAT_13, 0x00000000); 1199 ACCW(PGWHAT_14, 0x00000000); 1200 ACCW(PGWHAT_15, 0x00000000); 1201 ACCW(PGWHAT_16, 0x00000000); 1202 ACCW(PGWHAT_17, 0x00000000); 1203 ACCW(PGWHAT_18, 0x00000000); 1204 1205 ACCW(PGWHAT_19, 0x10000000); 1206 1207 ACCW(PGWHAT_1A, 0x00000000); 1208 ACCW(PGWHAT_1B, 0x00000000); 1209 ACCW(PGWHAT_1C, 0x00000000); 1210 ACCW(PGWHAT_1D, 0x00000000); 1211 ACCW(PGWHAT_1E, 0x00000000); 1212 ACCW(PGWHAT_1F, 0x00000000); 1213 ACCW(PGWHAT_20, 0x00000000); 1214 ACCW(PGWHAT_21, 0x00000000); 1215 1216 ACCW(PGWHAT_22, 0x08000000); 1217 1218 ACCW(PGWHAT_23, 0x00000000); 1219 ACCW(PGWHAT_24, 0x00000000); 1220 ACCW(PGWHAT_25, 0x00000000); 1221 ACCW(PGWHAT_26, 0x00000000); 1222 1223 ACCW(PGWHAT_27, 0x4b7fffff); 1224 1225 ACCW(PGWHAT_28, 0x00000000); 1226 ACCW(PGWHAT_29, 0x00000000); 1227 ACCW(PGWHAT_2A, 0x00000000); 1228 1229 /* setup window clipping */ 1230 /* b0-11 = min; b16-27 = max. 1231 * note: 1232 * probably two's complement values, so setting to max range here: 1233 * which would be -2048 upto/including +2047. */ 1234 /* horizontal */ 1235 ACCW(WINCLIP_H_0, 0x07ff0800); 1236 ACCW(WINCLIP_H_1, 0x07ff0800); 1237 ACCW(WINCLIP_H_2, 0x07ff0800); 1238 ACCW(WINCLIP_H_3, 0x07ff0800); 1239 ACCW(WINCLIP_H_4, 0x07ff0800); 1240 ACCW(WINCLIP_H_5, 0x07ff0800); 1241 ACCW(WINCLIP_H_6, 0x07ff0800); 1242 ACCW(WINCLIP_H_7, 0x07ff0800); 1243 /* vertical */ 1244 ACCW(WINCLIP_V_0, 0x07ff0800); 1245 ACCW(WINCLIP_V_1, 0x07ff0800); 1246 ACCW(WINCLIP_V_2, 0x07ff0800); 1247 ACCW(WINCLIP_V_3, 0x07ff0800); 1248 ACCW(WINCLIP_V_4, 0x07ff0800); 1249 ACCW(WINCLIP_V_5, 0x07ff0800); 1250 ACCW(WINCLIP_V_6, 0x07ff0800); 1251 ACCW(WINCLIP_V_7, 0x07ff0800); 1252 1253 /* setup (initialize) pipe: 1254 * needed to get valid 3D rendering on (at least) NV1x cards. Without this 1255 * those cards produce rubbish instead of 3D, although the engine itself keeps 1256 * running and 2D stays OK. */ 1257 1258 /* set eyetype to local, lightning etc. is off */ 1259 ACCW(NV10_XFMOD0, 0x10000000); 1260 /* disable all lights */ 1261 ACCW(NV10_XFMOD1, 0x00000000); 1262 1263 /* note: upon writing data into the PIPEDAT register, the PIPEADR is 1264 * probably auto-incremented! */ 1265 /* (pipe adress = b2-16, pipe data = b0-31) */ 1266 /* note: pipe adresses IGRAPH registers! */ 1267 ACCW(NV10_PIPEADR, 0x00006740); 1268 ACCW(NV10_PIPEDAT, 0x00000000); 1269 ACCW(NV10_PIPEDAT, 0x00000000); 1270 ACCW(NV10_PIPEDAT, 0x00000000); 1271 ACCW(NV10_PIPEDAT, 0x3f800000); 1272 1273 ACCW(NV10_PIPEADR, 0x00006750); 1274 ACCW(NV10_PIPEDAT, 0x40000000); 1275 ACCW(NV10_PIPEDAT, 0x40000000); 1276 ACCW(NV10_PIPEDAT, 0x40000000); 1277 ACCW(NV10_PIPEDAT, 0x40000000); 1278 1279 ACCW(NV10_PIPEADR, 0x00006760); 1280 ACCW(NV10_PIPEDAT, 0x00000000); 1281 ACCW(NV10_PIPEDAT, 0x00000000); 1282 ACCW(NV10_PIPEDAT, 0x3f800000); 1283 ACCW(NV10_PIPEDAT, 0x00000000); 1284 1285 ACCW(NV10_PIPEADR, 0x00006770); 1286 ACCW(NV10_PIPEDAT, 0xc5000000); 1287 ACCW(NV10_PIPEDAT, 0xc5000000); 1288 ACCW(NV10_PIPEDAT, 0x00000000); 1289 ACCW(NV10_PIPEDAT, 0x00000000); 1290 1291 ACCW(NV10_PIPEADR, 0x00006780); 1292 ACCW(NV10_PIPEDAT, 0x00000000); 1293 ACCW(NV10_PIPEDAT, 0x00000000); 1294 ACCW(NV10_PIPEDAT, 0x3f800000); 1295 ACCW(NV10_PIPEDAT, 0x00000000); 1296 1297 ACCW(NV10_PIPEADR, 0x000067a0); 1298 ACCW(NV10_PIPEDAT, 0x3f800000); 1299 ACCW(NV10_PIPEDAT, 0x3f800000); 1300 ACCW(NV10_PIPEDAT, 0x3f800000); 1301 ACCW(NV10_PIPEDAT, 0x3f800000); 1302 1303 ACCW(NV10_PIPEADR, 0x00006ab0); 1304 ACCW(NV10_PIPEDAT, 0x3f800000); 1305 ACCW(NV10_PIPEDAT, 0x3f800000); 1306 ACCW(NV10_PIPEDAT, 0x3f800000); 1307 1308 ACCW(NV10_PIPEADR, 0x00006ac0); 1309 ACCW(NV10_PIPEDAT, 0x00000000); 1310 ACCW(NV10_PIPEDAT, 0x00000000); 1311 ACCW(NV10_PIPEDAT, 0x00000000); 1312 1313 ACCW(NV10_PIPEADR, 0x00006c10); 1314 ACCW(NV10_PIPEDAT, 0xbf800000); 1315 1316 ACCW(NV10_PIPEADR, 0x00007030); 1317 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1318 1319 ACCW(NV10_PIPEADR, 0x00007040); 1320 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1321 1322 ACCW(NV10_PIPEADR, 0x00007050); 1323 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1324 1325 ACCW(NV10_PIPEADR, 0x00007060); 1326 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1327 1328 ACCW(NV10_PIPEADR, 0x00007070); 1329 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1330 1331 ACCW(NV10_PIPEADR, 0x00007080); 1332 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1333 1334 ACCW(NV10_PIPEADR, 0x00007090); 1335 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1336 1337 ACCW(NV10_PIPEADR, 0x000070a0); 1338 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1339 1340 ACCW(NV10_PIPEADR, 0x00006a80); 1341 ACCW(NV10_PIPEDAT, 0x00000000); 1342 ACCW(NV10_PIPEDAT, 0x00000000); 1343 ACCW(NV10_PIPEDAT, 0x3f800000); 1344 1345 ACCW(NV10_PIPEADR, 0x00006aa0); 1346 ACCW(NV10_PIPEDAT, 0x00000000); 1347 ACCW(NV10_PIPEDAT, 0x00000000); 1348 ACCW(NV10_PIPEDAT, 0x00000000); 1349 1350 /* select primitive type that will be drawn (tri's) */ 1351 ACCW(NV10_PIPEADR, 0x00000040); 1352 ACCW(NV10_PIPEDAT, 0x00000005); 1353 1354 ACCW(NV10_PIPEADR, 0x00006400); 1355 ACCW(NV10_PIPEDAT, 0x3f800000); 1356 ACCW(NV10_PIPEDAT, 0x3f800000); 1357 ACCW(NV10_PIPEDAT, 0x4b7fffff); 1358 ACCW(NV10_PIPEDAT, 0x00000000); 1359 1360 ACCW(NV10_PIPEADR, 0x00006410); 1361 ACCW(NV10_PIPEDAT, 0xc5000000); 1362 ACCW(NV10_PIPEDAT, 0xc5000000); 1363 ACCW(NV10_PIPEDAT, 0x00000000); 1364 ACCW(NV10_PIPEDAT, 0x00000000); 1365 1366 ACCW(NV10_PIPEADR, 0x00006420); 1367 ACCW(NV10_PIPEDAT, 0x00000000); 1368 ACCW(NV10_PIPEDAT, 0x00000000); 1369 ACCW(NV10_PIPEDAT, 0x00000000); 1370 ACCW(NV10_PIPEDAT, 0x00000000); 1371 1372 ACCW(NV10_PIPEADR, 0x00006430); 1373 ACCW(NV10_PIPEDAT, 0x00000000); 1374 ACCW(NV10_PIPEDAT, 0x00000000); 1375 ACCW(NV10_PIPEDAT, 0x00000000); 1376 ACCW(NV10_PIPEDAT, 0x00000000); 1377 1378 ACCW(NV10_PIPEADR, 0x000064c0); 1379 ACCW(NV10_PIPEDAT, 0x3f800000); 1380 ACCW(NV10_PIPEDAT, 0x3f800000); 1381 ACCW(NV10_PIPEDAT, 0x477fffff); 1382 ACCW(NV10_PIPEDAT, 0x3f800000); 1383 1384 ACCW(NV10_PIPEADR, 0x000064d0); 1385 ACCW(NV10_PIPEDAT, 0xc5000000); 1386 ACCW(NV10_PIPEDAT, 0xc5000000); 1387 ACCW(NV10_PIPEDAT, 0x00000000); 1388 ACCW(NV10_PIPEDAT, 0x00000000); 1389 1390 ACCW(NV10_PIPEADR, 0x000064e0); 1391 ACCW(NV10_PIPEDAT, 0xc4fff000); 1392 ACCW(NV10_PIPEDAT, 0xc4fff000); 1393 ACCW(NV10_PIPEDAT, 0x00000000); 1394 ACCW(NV10_PIPEDAT, 0x00000000); 1395 1396 ACCW(NV10_PIPEADR, 0x000064f0); 1397 ACCW(NV10_PIPEDAT, 0x00000000); 1398 ACCW(NV10_PIPEDAT, 0x00000000); 1399 ACCW(NV10_PIPEDAT, 0x00000000); 1400 ACCW(NV10_PIPEDAT, 0x00000000); 1401 1402 /* turn lightning on */ 1403 ACCW(NV10_XFMOD0, 0x30000000); 1404 /* set light 1 to infinite type, other lights remain off */ 1405 ACCW(NV10_XFMOD1, 0x00000004); 1406 1407 /* Z-buffer state is: 1408 * initialized, set to: 'fixed point' (integer?); Z-buffer; 16bits depth */ 1409 /* note: 1410 * other options possible are: floating point; 24bits depth; W-buffer */ 1411 ACCW(GLOB_STAT_0, 0x10000000); 1412 /* set DMA instance 2 and 3 to be invalid */ 1413 ACCW(GLOB_STAT_1, 0x00000000); 1414 } 1415 } 1416 1417 static void nv_start_dma(void) 1418 { 1419 uint32 dummy; 1420 1421 if (si->engine.dma.current != si->engine.dma.put) 1422 { 1423 si->engine.dma.put = si->engine.dma.current; 1424 /* flush used caches so we know for sure the DMA cmd buffer received all data. */ 1425 if (si->ps.card_arch < NV40A) 1426 { 1427 /* some CPU's support out-of-order processing (WinChip/Cyrix). Flush them. */ 1428 __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory"); 1429 /* read a non-cached adress to flush the cash */ 1430 dummy = ACCR(STATUS); 1431 } 1432 else 1433 { 1434 /* dummy read the first adress of the framebuffer to flush MTRR-WC buffers */ 1435 dummy = *((volatile uint32 *)(si->framebuffer)); 1436 } 1437 1438 /* actually start DMA to execute all commands now in buffer */ 1439 /* note: 1440 * it doesn't matter which FIFO channel's DMA registers we access, they are in 1441 * fact all the same set. It also doesn't matter if the channel was assigned a 1442 * command or not. */ 1443 /* note also: 1444 * NV_GENERAL_DMAPUT is a write-only register on some cards (confirmed NV11). */ 1445 NV_REG32(NVACC_FIFO + NV_GENERAL_DMAPUT) = (si->engine.dma.put << 2); 1446 } 1447 } 1448 1449 /* this routine does not check the engine's internal hardware FIFO, but the DMA 1450 * command buffer. You can see this as a FIFO as well, that feeds the hardware FIFO. 1451 * The hardware FIFO state is checked by the DMA hardware automatically. */ 1452 static status_t nv_acc_fifofree_dma(uint16 cmd_size) 1453 { 1454 uint32 dmaget; 1455 1456 /* we'd better check for timeouts on the DMA engine as it's theoretically 1457 * breakable by malfunctioning software */ 1458 uint16 cnt = 0; 1459 1460 /* check if the DMA buffer has enough room for the command. 1461 * note: 1462 * engine.dma.free is 'cached' */ 1463 while ((si->engine.dma.free < cmd_size) && (cnt < 10000) && (err < 3)) 1464 { 1465 /* see where the engine is currently fetching from the buffer */ 1466 /* note: 1467 * read this only once in the code as accessing registers is relatively slow */ 1468 /* note also: 1469 * it doesn't matter which FIFO channel's DMA registers we access, they are in 1470 * fact all the same set. It also doesn't matter if the channel was assigned a 1471 * command or not. */ 1472 dmaget = ((NV_REG32(NVACC_FIFO + NV_GENERAL_DMAGET)) >> 2); 1473 1474 /* update timeout counter: on NV11 on a Pentium4 2.8Ghz max reached count 1475 * using BeRoMeter 1.2.6 was about 600; so counting 10000 before generating 1476 * a timeout should definately do it. Snooze()-ing cannot be done without a 1477 * serious speed penalty, even if done for only 1 microSecond. */ 1478 cnt++; 1479 1480 /* where's the engine fetching viewed from us issuing? */ 1481 if (si->engine.dma.put >= dmaget) 1482 { 1483 /* engine is fetching 'behind us', the last piece of the buffer is free */ 1484 1485 /* note the 'updated' free space we have in the DMA buffer */ 1486 si->engine.dma.free = si->engine.dma.max - si->engine.dma.current; 1487 /* if it's enough after all we exit this routine immediately. Else: */ 1488 if (si->engine.dma.free < cmd_size) 1489 { 1490 /* not enough room left, so instruct DMA engine to reset the buffer 1491 * when it's reaching the end of it */ 1492 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x20000000; 1493 /* reset our buffer pointer, so new commands will be placed at the 1494 * beginning of the buffer. */ 1495 si->engine.dma.current = 0; 1496 /* tell the engine to fetch the remaining command(s) in the DMA buffer 1497 * that where not executed before. */ 1498 nv_start_dma(); 1499 1500 /* NOW the engine is fetching 'in front of us', so the first piece 1501 * of the buffer is free */ 1502 1503 /* note the updated current free space we have in the DMA buffer */ 1504 si->engine.dma.free = dmaget - si->engine.dma.current; 1505 /* mind this pittfall: 1506 * Leave some room between where the engine is fetching and where we 1507 * put new commands. Otherwise the engine will crash on heavy loads. 1508 * A crash can be forced best in 640x480x32 mode with BeRoMeter 1.2.6. 1509 * (confirmed on NV11 and NV43 with less than 256 words forced freespace.) 1510 * Note: 1511 * The engine is DMA triggered for fetching chunks every 128 bytes, 1512 * maybe this is the reason for this behaviour. 1513 * Note also: 1514 * it looks like the space that needs to be kept free is coupled 1515 * with the size of the DMA buffer. */ 1516 if (si->engine.dma.free < 256) 1517 si->engine.dma.free = 0; 1518 else 1519 si->engine.dma.free -= 256; 1520 } 1521 } 1522 else 1523 { 1524 /* engine is fetching 'in front of us', so the first piece of the buffer 1525 * is free */ 1526 1527 /* note the updated current free space we have in the DMA buffer */ 1528 si->engine.dma.free = dmaget - si->engine.dma.current; 1529 /* mind this pittfall: 1530 * Leave some room between where the engine is fetching and where we 1531 * put new commands. Otherwise the engine will crash on heavy loads. 1532 * A crash can be forced best in 640x480x32 mode with BeRoMeter 1.2.6. 1533 * (confirmed on NV11 and NV43 with less than 256 words forced freespace.) 1534 * Note: 1535 * The engine is DMA triggered for fetching chunks every 128 bytes, 1536 * maybe this is the reason for this behaviour. 1537 * Note also: 1538 * it looks like the space that needs to be kept free is coupled 1539 * with the size of the DMA buffer. */ 1540 if (si->engine.dma.free < 256) 1541 si->engine.dma.free = 0; 1542 else 1543 si->engine.dma.free -= 256; 1544 } 1545 } 1546 1547 /* log timeout if we had one */ 1548 if (cnt == 10000) 1549 { 1550 if (err < 3) err++; 1551 LOG(4,("ACC_DMA: fifofree; DMA timeout #%d, engine trouble!\n", err)); 1552 } 1553 1554 /* we must make the acceleration routines abort or the driver will hang! */ 1555 if (err >= 3) return B_ERROR; 1556 1557 return B_OK; 1558 } 1559 1560 static void nv_acc_cmd_dma(uint32 cmd, uint16 offset, uint16 size) 1561 { 1562 /* NV_FIFO_DMA_OPCODE: set number of cmd words (b18 - 28); set FIFO offset for 1563 * first cmd word (b2 - 15); set DMA opcode = method (b29 - 31). 1564 * a 'NOP' is the opcode word $00000000. */ 1565 /* note: 1566 * possible DMA opcodes: 1567 * b'000' is 'method' (execute cmd); 1568 * b'001' is 'jump'; 1569 * b'002' is 'noninc method' (execute buffer wrap-around); 1570 * b'003' is 'call': return is executed by opcode word $00020000 (b17 = 1). */ 1571 /* note also: 1572 * this system uses auto-increments for the FIFO offset adresses. Make sure 1573 * to set a new adress if a gap exists between the previous one and the new one. */ 1574 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = ((size << 18) | 1575 ((si->engine.fifo.ch_ptr[cmd] + offset) & 0x0000fffc)); 1576 1577 /* space left after issuing the current command is the cmd AND it's arguments less */ 1578 si->engine.dma.free -= (size + 1); 1579 } 1580 1581 static void nv_acc_set_ch_dma(uint16 ch, uint32 handle) 1582 { 1583 /* issue FIFO channel assign cmd */ 1584 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = ((1 << 18) | ch); 1585 /* set new assignment */ 1586 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = (0x80000000 | handle); 1587 1588 /* space left after issuing the current command is the cmd AND it's arguments less */ 1589 si->engine.dma.free -= 2; 1590 } 1591 1592 /* note: 1593 * switching fifo channel assignments this way has no noticable slowdown: 1594 * measured 0.2% with Quake2. */ 1595 void nv_acc_assert_fifo_dma(void) 1596 { 1597 /* does every engine cmd this accelerant needs have a FIFO channel? */ 1598 //fixme: can probably be optimized for both speed and channel selection... 1599 if (!si->engine.fifo.ch_ptr[NV_ROP5_SOLID] || 1600 !si->engine.fifo.ch_ptr[NV_IMAGE_BLACK_RECTANGLE] || 1601 !si->engine.fifo.ch_ptr[NV_IMAGE_PATTERN] || 1602 !si->engine.fifo.ch_ptr[NV4_SURFACE] || 1603 !si->engine.fifo.ch_ptr[NV_IMAGE_BLIT] || 1604 !si->engine.fifo.ch_ptr[NV4_GDI_RECTANGLE_TEXT] || 1605 !si->engine.fifo.ch_ptr[NV_SCALED_IMAGE_FROM_MEMORY]) 1606 { 1607 uint16 cnt; 1608 1609 /* free the FIFO channels we want from the currently assigned cmd's */ 1610 si->engine.fifo.ch_ptr[si->engine.fifo.handle[0]] = 0; 1611 si->engine.fifo.ch_ptr[si->engine.fifo.handle[1]] = 0; 1612 si->engine.fifo.ch_ptr[si->engine.fifo.handle[2]] = 0; 1613 si->engine.fifo.ch_ptr[si->engine.fifo.handle[3]] = 0; 1614 si->engine.fifo.ch_ptr[si->engine.fifo.handle[4]] = 0; 1615 si->engine.fifo.ch_ptr[si->engine.fifo.handle[5]] = 0; 1616 si->engine.fifo.ch_ptr[si->engine.fifo.handle[6]] = 0; 1617 1618 /* set new object handles */ 1619 si->engine.fifo.handle[0] = NV_ROP5_SOLID; 1620 si->engine.fifo.handle[1] = NV_IMAGE_BLACK_RECTANGLE; 1621 si->engine.fifo.handle[2] = NV_IMAGE_PATTERN; 1622 si->engine.fifo.handle[3] = NV4_SURFACE; 1623 si->engine.fifo.handle[4] = NV_IMAGE_BLIT; 1624 si->engine.fifo.handle[5] = NV4_GDI_RECTANGLE_TEXT; 1625 si->engine.fifo.handle[6] = NV_SCALED_IMAGE_FROM_MEMORY; 1626 1627 /* set handle's pointers to their assigned FIFO channels */ 1628 /* note: 1629 * b0-1 aren't used as adressbits. Using b0 to indicate a valid pointer. */ 1630 for (cnt = 0; cnt < 0x08; cnt++) 1631 { 1632 si->engine.fifo.ch_ptr[(si->engine.fifo.handle[cnt])] = 1633 (0x00000001 + (cnt * 0x00002000)); 1634 } 1635 1636 /* wait for room in fifo for new FIFO assigment cmds if needed. */ 1637 if (nv_acc_fifofree_dma(14) != B_OK) return; 1638 1639 /* program new FIFO assignments */ 1640 /* Raster OPeration: */ 1641 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH0, si->engine.fifo.handle[0]); 1642 /* Clip: */ 1643 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH1, si->engine.fifo.handle[1]); 1644 /* Pattern: */ 1645 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH2, si->engine.fifo.handle[2]); 1646 /* 2D Surface: */ 1647 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH3, si->engine.fifo.handle[3]); 1648 /* Blit: */ 1649 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH4, si->engine.fifo.handle[4]); 1650 /* Bitmap: */ 1651 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH5, si->engine.fifo.handle[5]); 1652 /* Scaled and fitered Blit: */ 1653 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH6, si->engine.fifo.handle[6]); 1654 1655 /* tell the engine to fetch and execute all (new) commands in the DMA buffer */ 1656 nv_start_dma(); 1657 } 1658 } 1659 1660 /* 1661 note: 1662 moved acceleration 'top-level' routines to be integrated in the engine: 1663 it is costly to call the engine for every single function within a loop! 1664 (measured with BeRoMeter 1.2.6: upto 15% speed increase on all CPU's.) 1665 1666 note also: 1667 splitting up each command list into sublists (see routines below) prevents 1668 a lot more nested calls, further increasing the speed with upto 70%. 1669 1670 finally: 1671 sending the sublist to just one single engine command even further increases 1672 speed with upto another 10%. This can't be done for blits though, as this engine- 1673 command's hardware does not support multiple objects. 1674 */ 1675 1676 /* screen to screen blit - i.e. move windows around and scroll within them. */ 1677 void SCREEN_TO_SCREEN_BLIT_DMA(engine_token *et, blit_params *list, uint32 count) 1678 { 1679 uint32 i = 0; 1680 uint16 subcnt; 1681 1682 /*** init acc engine for blit function ***/ 1683 /* ROP registers (Raster OPeration): 1684 * wait for room in fifo for ROP cmd if needed. */ 1685 if (nv_acc_fifofree_dma(2) != B_OK) return; 1686 /* now setup ROP (writing 2 32bit words) for GXcopy */ 1687 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); 1688 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xcc; /* SetRop5 */ 1689 1690 /*** do each blit ***/ 1691 /* Note: 1692 * blit-copy direction is determined inside nvidia hardware: no setup needed */ 1693 while (count) 1694 { 1695 /* break up the list in sublists to minimize calls, while making sure long 1696 * lists still get executed without trouble */ 1697 subcnt = 32; 1698 if (count < 32) subcnt = count; 1699 count -= subcnt; 1700 1701 /* wait for room in fifo for blit cmd if needed. */ 1702 if (nv_acc_fifofree_dma(4 * subcnt) != B_OK) return; 1703 1704 while (subcnt--) 1705 { 1706 /* now setup blit (writing 4 32bit words) */ 1707 nv_acc_cmd_dma(NV_IMAGE_BLIT, NV_IMAGE_BLIT_SOURCEORG, 3); 1708 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1709 (((list[i].src_top) << 16) | (list[i].src_left)); /* SourceOrg */ 1710 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1711 (((list[i].dest_top) << 16) | (list[i].dest_left)); /* DestOrg */ 1712 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1713 ((((list[i].height) + 1) << 16) | ((list[i].width) + 1)); /* HeightWidth */ 1714 1715 i++; 1716 } 1717 1718 /* tell the engine to fetch the commands in the DMA buffer that where not 1719 * executed before. */ 1720 nv_start_dma(); 1721 } 1722 1723 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 1724 si->engine.threeD.reload = 0xffffffff; 1725 } 1726 1727 /* scaled and filtered screen to screen blit - i.e. video playback without overlay */ 1728 /* note: source and destination may not overlap. */ 1729 //fixme? checkout NV5 and NV10 version of cmd: faster?? (or is 0x77 a 'autoselect' version?) 1730 void SCREEN_TO_SCREEN_SCALED_FILTERED_BLIT_DMA(engine_token *et, scaled_blit_params *list, uint32 count) 1731 { 1732 uint32 i = 0; 1733 uint16 subcnt; 1734 uint32 cmd_depth; 1735 uint8 bpp; 1736 1737 /*** init acc engine for scaled filtered blit function ***/ 1738 /* Set pixel width */ 1739 switch(si->dm.space) 1740 { 1741 case B_RGB15_LITTLE: 1742 cmd_depth = 0x00000002; 1743 bpp = 2; 1744 break; 1745 case B_RGB16_LITTLE: 1746 cmd_depth = 0x00000007; 1747 bpp = 2; 1748 break; 1749 case B_RGB32_LITTLE: 1750 case B_RGBA32_LITTLE: 1751 cmd_depth = 0x00000004; 1752 bpp = 4; 1753 break; 1754 /* fixme sometime: 1755 * we could do the spaces below if this function would be modified to be able 1756 * to use a source outside of the desktop, i.e. using offscreen bitmaps... */ 1757 case B_YCbCr422: 1758 cmd_depth = 0x00000005; 1759 bpp = 2; 1760 break; 1761 case B_YUV422: 1762 cmd_depth = 0x00000006; 1763 bpp = 2; 1764 break; 1765 default: 1766 /* note: this function does not support src or dest in the B_CMAP8 space! */ 1767 //fixme: the NV10 version of this cmd supports B_CMAP8 src though... (checkout) 1768 LOG(8,("ACC_DMA: scaled_filtered_blit, invalid bit depth\n")); 1769 return; 1770 } 1771 1772 /* modify surface depth settings for 15-bit colorspace so command works as intended */ 1773 if (si->dm.space == B_RGB15_LITTLE) 1774 { 1775 /* wait for room in fifo for surface setup cmd if needed */ 1776 if (nv_acc_fifofree_dma(2) != B_OK) return; 1777 /* now setup 2D surface (writing 1 32bit word) */ 1778 nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 1); 1779 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000002; /* Format */ 1780 } 1781 1782 /* TNT1 has fixed operation mode 'SRCcopy' while the rest can be programmed: */ 1783 if (si->ps.card_type != NV04) 1784 { 1785 /* wait for room in fifo for cmds if needed. */ 1786 if (nv_acc_fifofree_dma(5) != B_OK) return; 1787 /* now setup source bitmap colorspace */ 1788 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SETCOLORFORMAT, 2); 1789 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ 1790 /* now setup operation mode to SRCcopy */ 1791 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000003; /* SetOperation */ 1792 } 1793 else 1794 { 1795 /* wait for room in fifo for cmd if needed. */ 1796 if (nv_acc_fifofree_dma(4) != B_OK) return; 1797 /* now setup source bitmap colorspace */ 1798 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SETCOLORFORMAT, 1); 1799 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ 1800 /* TNT1 has fixed operation mode SRCcopy */ 1801 } 1802 /* now setup fill color (writing 2 32bit words) */ 1803 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); 1804 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000000; /* Color1A */ 1805 1806 /*** do each blit ***/ 1807 while (count) 1808 { 1809 /* break up the list in sublists to minimize calls, while making sure long 1810 * lists still get executed without trouble */ 1811 subcnt = 16; 1812 if (count < 16) subcnt = count; 1813 count -= subcnt; 1814 1815 /* wait for room in fifo for blit cmd if needed. */ 1816 if (nv_acc_fifofree_dma(12 * subcnt) != B_OK) return; 1817 1818 while (subcnt--) 1819 { 1820 /* now setup blit (writing 12 32bit words) */ 1821 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SOURCEORG, 6); 1822 /* setup dest clipping ref for blit (not used) (b0-15 = left, b16-31 = top) */ 1823 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0; /* SourceOrg */ 1824 /* setup dest clipping size for blit */ 1825 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1826 (((list[i].dest_height + 1) << 16) | (list[i].dest_width + 1)); /* SourceHeightWidth */ 1827 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1828 /* setup destination location and size for blit */ 1829 (((list[i].dest_top) << 16) | (list[i].dest_left)); /* DestOrg */ 1830 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1831 (((list[i].dest_height + 1) << 16) | (list[i].dest_width + 1)); /* DestHeightWidth */ 1832 //fixme: findout scaling limits... (although the current cmd interface doesn't support them.) 1833 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1834 (((list[i].src_width + 1) << 20) / (list[i].dest_width + 1)); /* HorInvScale (in 12.20 format) */ 1835 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1836 (((list[i].src_height + 1) << 20) / (list[i].dest_height + 1)); /* VerInvScale (in 12.20 format) */ 1837 1838 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SOURCESIZE, 4); 1839 /* setup horizontal and vertical source (fetching) ends. 1840 * note: 1841 * horizontal granularity is 2 pixels, vertical granularity is 1 pixel. 1842 * look at Matrox or Neomagic bes engines code for usage example. */ 1843 //fixme: tested 15, 16 and 32-bit RGB depth, verify other depths... 1844 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1845 (((list[i].src_height + 1) << 16) | 1846 (((list[i].src_width + 1) + 0x0001) & ~0x0001)); /* SourceHeightWidth */ 1847 /* setup source pitch (b0-15). Set 'format origin center' (b16-17) and 1848 * select 'format interpolator foh (bilinear filtering)' (b24). */ 1849 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1850 (si->fbc.bytes_per_row | (1 << 16) | (1 << 24)); /* SourcePitch */ 1851 /* setup source surface location */ 1852 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1853 ((uint32)((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)) + 1854 (list[i].src_top * si->fbc.bytes_per_row) + (list[i].src_left * bpp); /* Offset */ 1855 /* setup source start: first (sub)pixel contributing to output picture */ 1856 /* note: 1857 * clipping is not asked for. 1858 * look at nVidia NV10+ bes engine code for useage example. */ 1859 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1860 0; /* SourceRef (b0-15 = hor, b16-31 = ver: both in 12.4 format) */ 1861 1862 i++; 1863 } 1864 1865 /* tell the engine to fetch the commands in the DMA buffer that where not 1866 * executed before. */ 1867 nv_start_dma(); 1868 } 1869 1870 /* reset surface depth settings so the other engine commands works as intended */ 1871 if (si->dm.space == B_RGB15_LITTLE) 1872 { 1873 /* wait for room in fifo for surface setup cmd if needed */ 1874 if (nv_acc_fifofree_dma(2) != B_OK) return; 1875 /* now setup 2D surface (writing 1 32bit word) */ 1876 nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 1); 1877 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000004; /* Format */ 1878 1879 /* tell the engine to fetch the commands in the DMA buffer that where not 1880 * executed before. */ 1881 nv_start_dma(); 1882 } 1883 1884 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 1885 si->engine.threeD.reload = 0xffffffff; 1886 } 1887 1888 1889 /* scaled and filtered screen to screen blit - i.e. video playback without overlay */ 1890 /* note: source and destination may not overlap. */ 1891 // FIXME? checkout NV5 and NV10 version of cmd: faster?? (or is 0x77 a 'autoselect' version?) 1892 void OFFSCREEN_TO_SCREEN_SCALED_FILTERED_BLIT_DMA( 1893 engine_token *et, offscreen_buffer_config *config, clipped_scaled_blit_params *list, uint32 count) 1894 { 1895 uint32 i = 0; 1896 uint32 cmd_depth; 1897 uint8 bpp; 1898 1899 LOG(4, ("ACC_DMA: offscreen src buffer location $%p\n", 1900 (uint8*)(config->buffer))); 1901 1902 /*** init acc engine for scaled filtered blit function ***/ 1903 /* Set pixel width */ 1904 switch (config->space) 1905 { 1906 case B_RGB15_LITTLE: 1907 cmd_depth = 0x00000002; 1908 bpp = 2; 1909 break; 1910 case B_RGB16_LITTLE: 1911 cmd_depth = 0x00000007; 1912 bpp = 2; 1913 break; 1914 case B_RGB32_LITTLE: 1915 case B_RGBA32_LITTLE: 1916 cmd_depth = 0x00000004; 1917 bpp = 4; 1918 break; 1919 /* fixme sometime: 1920 * we could do the spaces below if this function would be modified to be able 1921 * to use a source outside of the desktop, i.e. using offscreen bitmaps... */ 1922 case B_YCbCr422: 1923 cmd_depth = 0x00000005; 1924 bpp = 2; 1925 break; 1926 case B_YUV422: 1927 cmd_depth = 0x00000006; 1928 bpp = 2; 1929 break; 1930 default: 1931 /* note: this function does not support src or dest in the B_CMAP8 space! */ 1932 //fixme: the NV10 version of this cmd supports B_CMAP8 src though... (checkout) 1933 LOG(8,("ACC_DMA: scaled_filtered_blit, invalid bit depth\n")); 1934 return; 1935 } 1936 1937 /* modify surface depth settings for 15-bit colorspace so command works as intended */ 1938 if (si->dm.space == B_RGB15_LITTLE) 1939 { 1940 /* wait for room in fifo for surface setup cmd if needed */ 1941 if (nv_acc_fifofree_dma(2) != B_OK) return; 1942 /* now setup 2D surface (writing 1 32bit word) */ 1943 nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 1); 1944 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000002; /* Format */ 1945 } 1946 1947 /* TNT1 has fixed operation mode 'SRCcopy' while the rest can be programmed: */ 1948 if (si->ps.card_type != NV04) 1949 { 1950 /* wait for room in fifo for cmds if needed. */ 1951 if (nv_acc_fifofree_dma(5) != B_OK) return; 1952 /* now setup source bitmap colorspace */ 1953 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SETCOLORFORMAT, 2); 1954 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ 1955 /* now setup operation mode to SRCcopy */ 1956 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000003; /* SetOperation */ 1957 } 1958 else 1959 { 1960 /* wait for room in fifo for cmd if needed. */ 1961 if (nv_acc_fifofree_dma(4) != B_OK) return; 1962 /* now setup source bitmap colorspace */ 1963 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SETCOLORFORMAT, 1); 1964 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ 1965 /* TNT1 has fixed operation mode SRCcopy */ 1966 } 1967 /* now setup fill color (writing 2 32bit words) */ 1968 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); 1969 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000000; /* Color1A */ 1970 1971 /*** do each blit ***/ 1972 while (count--) 1973 { 1974 uint32 j = 0; 1975 uint16 clipcnt = list[i].dest_clipcount; 1976 1977 LOG(4,("ACC_DMA: offscreen src left %d, top %d\n", list[i].src_left, list[i].src_top)); 1978 LOG(4,("ACC_DMA: offscreen src width %d, height %d\n", list[i].src_width + 1, list[i].src_height + 1)); 1979 LOG(4,("ACC_DMA: offscreen dest left %d, top %d\n", list[i].dest_left, list[i].dest_top)); 1980 LOG(4,("ACC_DMA: offscreen dest width %d, height %d\n", list[i].dest_width + 1, list[i].dest_height + 1)); 1981 1982 /* wait for room in fifo for blit cmd if needed. */ 1983 if (nv_acc_fifofree_dma(9 + (5 * clipcnt)) != B_OK) return; 1984 1985 /* now setup blit (writing 12 32bit words) */ 1986 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SOURCEORG + 8, 4); 1987 /* setup destination location and size for blit */ 1988 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1989 ((list[i].dest_top << 16) | list[i].dest_left); /* DestTopLeftOutputRect */ 1990 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1991 (((list[i].dest_height + 1) << 16) | (list[i].dest_width + 1)); /* DestHeightWidthOutputRect */ 1992 /* setup scaling */ 1993 //fixme: findout scaling limits... (although the current cmd interface doesn't support them.) 1994 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1995 (((list[i].src_width + 1) << 20) / (list[i].dest_width + 1)); /* HorInvScale (in 12.20 format) */ 1996 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1997 (((list[i].src_height + 1) << 20) / (list[i].dest_height + 1)); /* VerInvScale (in 12.20 format) */ 1998 1999 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SOURCESIZE, 3); 2000 /* setup horizontal and vertical source (fetching) ends. 2001 * note: 2002 * horizontal granularity is 2 pixels, vertical granularity is 1 pixel. 2003 * look at Matrox or Neomagic bes engines code for usage example. */ 2004 //fixme: tested 15, 16 and 32-bit RGB depth, verify other depths... 2005 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 2006 (((list[i].src_height + 1) << 16) | 2007 (((list[i].src_width + 1) + 0x0001) & ~0x0001)); /* SourceHeightWidth */ 2008 /* setup source pitch (b0-15). Set 'format origin center' (b16-17) and 2009 * select 'format interpolator foh (bilinear filtering)' (b24). */ 2010 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 2011 (config->bytes_per_row | (1 << 16) | (1 << 24)); /* SourcePitch */ 2012 2013 /* setup source surface location */ 2014 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 2015 (uint32)((uint8*)config->buffer - (uint8*)si->framebuffer + 2016 (list[i].src_top * config->bytes_per_row) + (list[i].src_left * bpp)); /* Offset */ 2017 2018 while (clipcnt--) 2019 { 2020 LOG(4,("ACC_DMA: offscreen clip left %d, top %d\n", 2021 list[i].dest_cliplist[j].left, list[i].dest_cliplist[j].top)); 2022 LOG(4,("ACC_DMA: offscreen clip width %d, height %d\n", 2023 list[i].dest_cliplist[j].width + 1, list[i].dest_cliplist[j].height + 1)); 2024 2025 /* now setup blit (writing 12 32bit words) */ 2026 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SOURCEORG, 2); 2027 /* setup dest clipping rect for blit (b0-15 = left, b16-31 = top) */ 2028 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 2029 (list[i].dest_cliplist[j].top << 16) | list[i].dest_cliplist[j].left; /* DestTopLeftClipRect */ 2030 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 2031 ((list[i].dest_cliplist[j].height + 1) << 16) | (list[i].dest_cliplist[j].width + 1); /* DestHeightWidthClipRect */ 2032 2033 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SOURCESIZE + 12, 1); 2034 /* setup source start: first (sub)pixel contributing to output picture */ 2035 /* note: 2036 * clipping is not asked for. 2037 * look at nVidia NV10+ bes engine code for useage example. */ 2038 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 2039 0; /* SourceRef (b0-15 = hor, b16-31 = ver: both in 12.4 format) */ 2040 2041 j++; 2042 } 2043 2044 i++; 2045 } 2046 2047 /* tell the engine to fetch the commands in the DMA buffer that where not 2048 * executed before. */ 2049 nv_start_dma(); 2050 2051 /* reset surface depth settings so the other engine commands works as intended */ 2052 if (si->dm.space == B_RGB15_LITTLE) 2053 { 2054 /* wait for room in fifo for surface setup cmd if needed */ 2055 if (nv_acc_fifofree_dma(2) != B_OK) return; 2056 /* now setup 2D surface (writing 1 32bit word) */ 2057 nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 1); 2058 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000004; /* Format */ 2059 2060 /* tell the engine to fetch the commands in the DMA buffer that where not 2061 * executed before. */ 2062 nv_start_dma(); 2063 } 2064 2065 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 2066 si->engine.threeD.reload = 0xffffffff; 2067 } 2068 2069 /* rectangle fill - i.e. workspace and window background color */ 2070 void FILL_RECTANGLE_DMA(engine_token *et, uint32 colorIndex, fill_rect_params *list, uint32 count) 2071 { 2072 uint32 i = 0; 2073 uint16 subcnt; 2074 2075 /*** init acc engine for fill function ***/ 2076 /* ROP registers (Raster OPeration): 2077 * wait for room in fifo for ROP and bitmap cmd if needed. */ 2078 if (nv_acc_fifofree_dma(4) != B_OK) return; 2079 /* now setup ROP (writing 2 32bit words) for GXcopy */ 2080 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); 2081 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xcc; /* SetRop5 */ 2082 /* now setup fill color (writing 2 32bit words) */ 2083 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); 2084 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = colorIndex; /* Color1A */ 2085 2086 /*** draw each rectangle ***/ 2087 while (count) 2088 { 2089 /* break up the list in sublists to minimize calls, while making sure long 2090 * lists still get executed without trouble */ 2091 subcnt = 32; 2092 if (count < 32) subcnt = count; 2093 count -= subcnt; 2094 2095 /* wait for room in fifo for bitmap cmd if needed. */ 2096 if (nv_acc_fifofree_dma(1 + (2 * subcnt)) != B_OK) return; 2097 2098 /* issue fill command once... */ 2099 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, (2 * subcnt)); 2100 /* ... and send multiple rects (engine cmd supports 32 max) */ 2101 while (subcnt--) 2102 { 2103 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 2104 (((list[i].left) << 16) | ((list[i].top) & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */ 2105 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 2106 (((((list[i].right)+1) - (list[i].left)) << 16) | 2107 (((list[i].bottom-list[i].top)+1) & 0x0000ffff)); /* Unclipped Rect 0 WidthHeight */ 2108 2109 i++; 2110 } 2111 2112 /* tell the engine to fetch the commands in the DMA buffer that where not 2113 * executed before. */ 2114 nv_start_dma(); 2115 } 2116 2117 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 2118 si->engine.threeD.reload = 0xffffffff; 2119 } 2120 2121 /* span fill - i.e. (selected) menuitem background color (Dano) */ 2122 void FILL_SPAN_DMA(engine_token *et, uint32 colorIndex, uint16 *list, uint32 count) 2123 { 2124 uint32 i = 0; 2125 uint16 subcnt; 2126 2127 /*** init acc engine for fill function ***/ 2128 /* ROP registers (Raster OPeration): 2129 * wait for room in fifo for ROP and bitmap cmd if needed. */ 2130 if (nv_acc_fifofree_dma(4) != B_OK) return; 2131 /* now setup ROP (writing 2 32bit words) for GXcopy */ 2132 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); 2133 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xcc; /* SetRop5 */ 2134 /* now setup fill color (writing 2 32bit words) */ 2135 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); 2136 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = colorIndex; /* Color1A */ 2137 2138 /*** draw each span ***/ 2139 while (count) 2140 { 2141 /* break up the list in sublists to minimize calls, while making sure long 2142 * lists still get executed without trouble */ 2143 subcnt = 32; 2144 if (count < 32) subcnt = count; 2145 count -= subcnt; 2146 2147 /* wait for room in fifo for bitmap cmd if needed. */ 2148 if (nv_acc_fifofree_dma(1 + (2 * subcnt)) != B_OK) return; 2149 2150 /* issue fill command once... */ 2151 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, (2 * subcnt)); 2152 /* ... and send multiple rects (spans) (engine cmd supports 32 max) */ 2153 while (subcnt--) 2154 { 2155 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 2156 (((list[i+1]) << 16) | ((list[i]) & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */ 2157 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 2158 ((((list[i+2]+1) - (list[i+1])) << 16) | 0x00000001); /* Unclipped Rect 0 WidthHeight */ 2159 2160 i+=3; 2161 } 2162 2163 /* tell the engine to fetch the commands in the DMA buffer that where not 2164 * executed before. */ 2165 nv_start_dma(); 2166 } 2167 2168 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 2169 si->engine.threeD.reload = 0xffffffff; 2170 } 2171 2172 /* rectangle invert - i.e. text cursor and text selection */ 2173 void INVERT_RECTANGLE_DMA(engine_token *et, fill_rect_params *list, uint32 count) 2174 { 2175 uint32 i = 0; 2176 uint16 subcnt; 2177 2178 /*** init acc engine for invert function ***/ 2179 /* ROP registers (Raster OPeration): 2180 * wait for room in fifo for ROP and bitmap cmd if needed. */ 2181 if (nv_acc_fifofree_dma(4) != B_OK) return; 2182 /* now setup ROP (writing 2 32bit words) for GXinvert */ 2183 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); 2184 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x55; /* SetRop5 */ 2185 /* now reset fill color (writing 2 32bit words) */ 2186 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); 2187 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000000; /* Color1A */ 2188 2189 /*** invert each rectangle ***/ 2190 while (count) 2191 { 2192 /* break up the list in sublists to minimize calls, while making sure long 2193 * lists still get executed without trouble */ 2194 subcnt = 32; 2195 if (count < 32) subcnt = count; 2196 count -= subcnt; 2197 2198 /* wait for room in fifo for bitmap cmd if needed. */ 2199 if (nv_acc_fifofree_dma(1 + (2 * subcnt)) != B_OK) return; 2200 2201 /* issue fill command once... */ 2202 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, (2 * subcnt)); 2203 /* ... and send multiple rects (engine cmd supports 32 max) */ 2204 while (subcnt--) 2205 { 2206 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 2207 (((list[i].left) << 16) | ((list[i].top) & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */ 2208 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 2209 (((((list[i].right)+1) - (list[i].left)) << 16) | 2210 (((list[i].bottom-list[i].top)+1) & 0x0000ffff)); /* Unclipped Rect 0 WidthHeight */ 2211 2212 i++; 2213 } 2214 2215 /* tell the engine to fetch the commands in the DMA buffer that where not 2216 * executed before. */ 2217 nv_start_dma(); 2218 } 2219 2220 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 2221 si->engine.threeD.reload = 0xffffffff; 2222 } 2223