1 /* NV Acceleration functions */ 2 3 /* Author: 4 Rudolf Cornelissen 8/2003-9/2007. 5 6 This code was possible thanks to: 7 - the Linux XFree86 NV driver, 8 - the Linux UtahGLX 3D driver. 9 */ 10 11 #define MODULE_BIT 0x00080000 12 13 #include "nv_std.h" 14 15 /*acceleration notes*/ 16 17 /*functions Be's app_server uses: 18 fill span (horizontal only) 19 fill rectangle (these 2 are very similar) 20 invert rectangle 21 blit 22 */ 23 24 static void nv_init_for_3D_dma(void); 25 static void nv_start_dma(void); 26 static status_t nv_acc_fifofree_dma(uint16 cmd_size); 27 static void nv_acc_cmd_dma(uint32 cmd, uint16 offset, uint16 size); 28 static void nv_acc_set_ch_dma(uint16 ch, uint32 handle); 29 30 /* used to track engine DMA stalls */ 31 static uint8 err; 32 33 /* wait until engine completely idle */ 34 status_t nv_acc_wait_idle_dma() 35 { 36 /* we'd better check for timeouts on the DMA engine as it's theoretically 37 * breakable by malfunctioning software */ 38 uint16 cnt = 0; 39 40 /* wait until all upcoming commands are in execution at least. Do this until 41 * we hit a timeout; abort if we failed at least three times before: 42 * if DMA stalls, we have to forget about it alltogether at some point, or 43 * the system will almost come to a complete halt.. */ 44 /* note: 45 * it doesn't matter which FIFO channel's DMA registers we access, they are in 46 * fact all the same set. It also doesn't matter if the channel was assigned a 47 * command or not. */ 48 while ((NV_REG32(NVACC_FIFO + NV_GENERAL_DMAGET) != (si->engine.dma.put << 2)) && 49 (cnt < 10000) && (err < 3)) 50 { 51 /* snooze a bit so I do not hammer the bus */ 52 snooze (100); 53 cnt++; 54 } 55 56 /* log timeout if we had one */ 57 if (cnt == 10000) 58 { 59 if (err < 3) err++; 60 LOG(4,("ACC_DMA: wait_idle; DMA timeout #%d, engine trouble!\n", err)); 61 } 62 63 /* wait until execution completed */ 64 while (ACCR(STATUS)) 65 { 66 /* snooze a bit so I do not hammer the bus */ 67 snooze (100); 68 } 69 70 return B_OK; 71 } 72 73 /* AFAIK this must be done for every new screenmode. 74 * Engine required init. */ 75 status_t nv_acc_init_dma() 76 { 77 uint32 cnt, tmp; 78 uint32 surf_depth, cmd_depth; 79 /* reset the engine DMA stalls counter */ 80 err = 0; 81 82 /* a hanging engine only recovers from a complete power-down/power-up cycle */ 83 NV_REG32(NV32_PWRUPCTRL) = 0x13110011; 84 snooze(1000); 85 NV_REG32(NV32_PWRUPCTRL) = 0x13111111; 86 87 /* don't try this on NV20 and later.. */ 88 /* note: 89 * the specific register that's responsible for the speedfix on NV18 is 90 * $00400ed8: bit 6 needs to be zero for fastest rendering (confirmed). */ 91 /* note also: 92 * on NV28 the following ranges could be reset (confirmed): 93 * $00400000 upto/incl. $004002fc; 94 * $00400400 upto/incl. $004017fc; 95 * $0040180c upto/incl. $00401948; 96 * $00401994 upto/incl. $00401a80; 97 * $00401a94 upto/incl. $00401ffc. 98 * The intermediate ranges hang the engine upon resetting. */ 99 if (si->ps.card_arch < NV20A) 100 { 101 /* actively reset the PGRAPH registerset (acceleration engine) */ 102 for (cnt = 0x00400000; cnt < 0x00402000; cnt +=4) 103 { 104 NV_REG32(cnt) = 0x00000000; 105 } 106 } 107 108 /* setup PTIMER: */ 109 //fixme? how about NV28 setup as just after coldstarting? (see nv_info.c) 110 /* set timer numerator to 8 (in b0-15) */ 111 ACCW(PT_NUMERATOR, 0x00000008); 112 /* set timer denominator to 3 (in b0-15) */ 113 ACCW(PT_DENOMINATR, 0x00000003); 114 115 /* disable timer-alarm INT requests (b0) */ 116 ACCW(PT_INTEN, 0x00000000); 117 /* reset timer-alarm INT status bit (b0) */ 118 ACCW(PT_INTSTAT, 0xffffffff); 119 120 /* enable PRAMIN write access on pre NV10 before programming it! */ 121 if (si->ps.card_arch == NV04A) 122 { 123 /* set framebuffer config: type = notiling, PRAMIN write access enabled */ 124 NV_REG32(NV32_PFB_CONFIG_0) = 0x00001114; 125 } 126 else 127 { 128 /* setup acc engine 'source' tile adressranges */ 129 if ((si->ps.card_type <= NV40) || (si->ps.card_type == NV45)) 130 { 131 ACCW(NV10_FBTIL0AD, 0); 132 ACCW(NV10_FBTIL1AD, 0); 133 ACCW(NV10_FBTIL2AD, 0); 134 ACCW(NV10_FBTIL3AD, 0); 135 ACCW(NV10_FBTIL4AD, 0); 136 ACCW(NV10_FBTIL5AD, 0); 137 ACCW(NV10_FBTIL6AD, 0); 138 ACCW(NV10_FBTIL7AD, 0); 139 ACCW(NV10_FBTIL0ED, (si->ps.memory_size - 1)); 140 ACCW(NV10_FBTIL1ED, (si->ps.memory_size - 1)); 141 ACCW(NV10_FBTIL2ED, (si->ps.memory_size - 1)); 142 ACCW(NV10_FBTIL3ED, (si->ps.memory_size - 1)); 143 ACCW(NV10_FBTIL4ED, (si->ps.memory_size - 1)); 144 ACCW(NV10_FBTIL5ED, (si->ps.memory_size - 1)); 145 ACCW(NV10_FBTIL6ED, (si->ps.memory_size - 1)); 146 ACCW(NV10_FBTIL7ED, (si->ps.memory_size - 1)); 147 } 148 else 149 { 150 /* NV41, 43, 44, G70 and up */ 151 ACCW(NV41_FBTIL0AD, 0); 152 ACCW(NV41_FBTIL1AD, 0); 153 ACCW(NV41_FBTIL2AD, 0); 154 ACCW(NV41_FBTIL3AD, 0); 155 ACCW(NV41_FBTIL4AD, 0); 156 ACCW(NV41_FBTIL5AD, 0); 157 ACCW(NV41_FBTIL6AD, 0); 158 ACCW(NV41_FBTIL7AD, 0); 159 ACCW(NV41_FBTIL8AD, 0); 160 ACCW(NV41_FBTIL9AD, 0); 161 ACCW(NV41_FBTILAAD, 0); 162 ACCW(NV41_FBTILBAD, 0); 163 ACCW(NV41_FBTIL0ED, (si->ps.memory_size - 1)); 164 ACCW(NV41_FBTIL1ED, (si->ps.memory_size - 1)); 165 ACCW(NV41_FBTIL2ED, (si->ps.memory_size - 1)); 166 ACCW(NV41_FBTIL3ED, (si->ps.memory_size - 1)); 167 ACCW(NV41_FBTIL4ED, (si->ps.memory_size - 1)); 168 ACCW(NV41_FBTIL5ED, (si->ps.memory_size - 1)); 169 ACCW(NV41_FBTIL6ED, (si->ps.memory_size - 1)); 170 ACCW(NV41_FBTIL7ED, (si->ps.memory_size - 1)); 171 ACCW(NV41_FBTIL8ED, (si->ps.memory_size - 1)); 172 ACCW(NV41_FBTIL9ED, (si->ps.memory_size - 1)); 173 ACCW(NV41_FBTILAED, (si->ps.memory_size - 1)); 174 ACCW(NV41_FBTILBED, (si->ps.memory_size - 1)); 175 176 if (si->ps.card_type >= G70) 177 { 178 ACCW(G70_FBTILCAD, 0); 179 ACCW(G70_FBTILDAD, 0); 180 ACCW(G70_FBTILEAD, 0); 181 ACCW(G70_FBTILCED, (si->ps.memory_size - 1)); 182 ACCW(G70_FBTILDED, (si->ps.memory_size - 1)); 183 ACCW(G70_FBTILEED, (si->ps.memory_size - 1)); 184 } 185 } 186 } 187 188 /*** PRAMIN ***/ 189 /* first clear the entire RAMHT (hash-table) space to a defined state. It turns 190 * out at least NV11 will keep the previously programmed handles over resets and 191 * power-outages upto about 15 seconds!! Faulty entries might well hang the 192 * engine (confirmed on NV11). 193 * Note: 194 * this behaviour is not very strange: even very old DRAM chips are known to be 195 * able to do this, even though you should refresh them every few milliseconds or 196 * so. (Large memory cell capacitors, though different cells vary a lot in their 197 * capacity.) 198 * Of course data validity is not certain by a long shot over this large 199 * amount of time.. */ 200 for(cnt = 0; cnt < 0x0400; cnt++) 201 NV_REG32(NVACC_HT_HANDL_00 + (cnt << 2)) = 0; 202 /* RAMHT (hash-table) space SETUP FIFO HANDLES */ 203 /* note: 204 * 'instance' tells you where the engine command is stored in 'PR_CTXx_x' sets 205 * below: instance being b4-19 with baseadress NV_PRAMIN_CTX_0 (0x00700000). 206 * That command is linked to the handle noted here. This handle is then used to 207 * tell the FIFO to which engine command it is connected! 208 * (CTX registers are actually a sort of RAM space.) */ 209 if (si->ps.card_arch >= NV40A) 210 { 211 /* (first set) */ 212 ACCW(HT_HANDL_00, (0x80000000 | NV10_CONTEXT_SURFACES_2D)); /* 32bit handle (not used) */ 213 ACCW(HT_VALUE_00, 0x0010114c); /* instance $114c, engine = acc engine, CHID = $00 */ 214 215 ACCW(HT_HANDL_01, (0x80000000 | NV_IMAGE_BLIT)); /* 32bit handle */ 216 ACCW(HT_VALUE_01, 0x00101148); /* instance $1148, engine = acc engine, CHID = $00 */ 217 218 ACCW(HT_HANDL_02, (0x80000000 | NV4_GDI_RECTANGLE_TEXT)); /* 32bit handle */ 219 ACCW(HT_VALUE_02, 0x0010114a); /* instance $114a, engine = acc engine, CHID = $00 */ 220 221 /* (second set) */ 222 ACCW(HT_HANDL_10, (0x80000000 | NV_ROP5_SOLID)); /* 32bit handle */ 223 ACCW(HT_VALUE_10, 0x00101142); /* instance $1142, engine = acc engine, CHID = $00 */ 224 225 ACCW(HT_HANDL_11, (0x80000000 | NV_IMAGE_BLACK_RECTANGLE)); /* 32bit handle */ 226 ACCW(HT_VALUE_11, 0x00101144); /* instance $1144, engine = acc engine, CHID = $00 */ 227 228 ACCW(HT_HANDL_12, (0x80000000 | NV_IMAGE_PATTERN)); /* 32bit handle */ 229 ACCW(HT_VALUE_12, 0x00101146); /* instance $1146, engine = acc engine, CHID = $00 */ 230 231 ACCW(HT_HANDL_13, (0x80000000 | NV_SCALED_IMAGE_FROM_MEMORY)); /* 32bit handle */ 232 ACCW(HT_VALUE_13, 0x0010114e); /* instance $114e, engine = acc engine, CHID = $00 */ 233 } 234 else 235 { 236 /* (first set) */ 237 ACCW(HT_HANDL_00, (0x80000000 | NV4_SURFACE)); /* 32bit handle */ 238 ACCW(HT_VALUE_00, 0x80011145); /* instance $1145, engine = acc engine, CHID = $00 */ 239 240 ACCW(HT_HANDL_01, (0x80000000 | NV_IMAGE_BLIT)); /* 32bit handle */ 241 ACCW(HT_VALUE_01, 0x80011146); /* instance $1146, engine = acc engine, CHID = $00 */ 242 243 ACCW(HT_HANDL_02, (0x80000000 | NV4_GDI_RECTANGLE_TEXT)); /* 32bit handle */ 244 ACCW(HT_VALUE_02, 0x80011147); /* instance $1147, engine = acc engine, CHID = $00 */ 245 246 ACCW(HT_HANDL_03, (0x80000000 | NV4_CONTEXT_SURFACES_ARGB_ZS)); /* 32bit handle (3D) */ 247 ACCW(HT_VALUE_03, 0x80011148); /* instance $1148, engine = acc engine, CHID = $00 */ 248 249 /* NV4_ and NV10_DX5_TEXTURE_TRIANGLE should be identical */ 250 ACCW(HT_HANDL_04, (0x80000000 | NV4_DX5_TEXTURE_TRIANGLE)); /* 32bit handle (3D) */ 251 ACCW(HT_VALUE_04, 0x80011149); /* instance $1149, engine = acc engine, CHID = $00 */ 252 253 /* NV4_ and NV10_DX6_MULTI_TEXTURE_TRIANGLE should be identical */ 254 ACCW(HT_HANDL_05, (0x80000000 | NV4_DX6_MULTI_TEXTURE_TRIANGLE)); /* 32bit handle (not used) */ 255 ACCW(HT_VALUE_05, 0x8001114a); /* instance $114a, engine = acc engine, CHID = $00 */ 256 257 ACCW(HT_HANDL_06, (0x80000000 | NV1_RENDER_SOLID_LIN)); /* 32bit handle (not used) */ 258 ACCW(HT_VALUE_06, 0x8001114c); /* instance $114c, engine = acc engine, CHID = $00 */ 259 260 /* (second set) */ 261 ACCW(HT_HANDL_10, (0x80000000 | NV_ROP5_SOLID)); /* 32bit handle */ 262 ACCW(HT_VALUE_10, 0x80011142); /* instance $1142, engine = acc engine, CHID = $00 */ 263 264 ACCW(HT_HANDL_11, (0x80000000 | NV_IMAGE_BLACK_RECTANGLE)); /* 32bit handle */ 265 ACCW(HT_VALUE_11, 0x80011143); /* instance $1143, engine = acc engine, CHID = $00 */ 266 267 ACCW(HT_HANDL_12, (0x80000000 | NV_IMAGE_PATTERN)); /* 32bit handle */ 268 ACCW(HT_VALUE_12, 0x80011144); /* instance $1144, engine = acc engine, CHID = $00 */ 269 270 ACCW(HT_HANDL_13, (0x80000000 | NV_SCALED_IMAGE_FROM_MEMORY)); /* 32bit handle */ 271 ACCW(HT_VALUE_13, 0x8001114b); /* instance $114b, engine = acc engine, CHID = $00 */ 272 273 //2007 3D tests.. 274 if (si->ps.card_type == NV15) 275 { 276 ACCW(HT_HANDL_14, (0x80000000 | NV_TCL_PRIMITIVE_3D)); /* 32bit handle */ 277 ACCW(HT_VALUE_14, 0x8001114d); /* instance $114d, engine = acc engine, CHID = $00 */ 278 } 279 280 } 281 282 /* program CTX registers: CTX1 is mostly done later (colorspace dependant) */ 283 /* note: 284 * CTX determines which HT handles point to what engine commands. */ 285 /* note also: 286 * CTX registers are in fact in the same GPU internal RAM space as the engine's 287 * hashtable. This means that stuff programmed in here also survives resets and 288 * power-outages! (confirmed NV11) */ 289 if (si->ps.card_arch >= NV40A) 290 { 291 /* setup a DMA define for use by command defines below. */ 292 ACCW(PR_CTX0_R, 0x00003000); /* DMA page table present and of linear type; 293 * DMA target node is NVM (non-volatile memory?) 294 * (instead of doing PCI or AGP transfers) */ 295 ACCW(PR_CTX1_R, (si->ps.memory_size - 1)); /* DMA limit: size is all cardRAM */ 296 ACCW(PR_CTX2_R, ((0x00000000 & 0xfffff000) | 0x00000002)); 297 /* DMA access type is READ_AND_WRITE; 298 * memory starts at start of cardRAM (b12-31): 299 * It's adress needs to be at a 4kb boundary! */ 300 ACCW(PR_CTX3_R, 0x00000002); /* unknown (looks like this is rubbish/not needed?) */ 301 /* setup set '0' for cmd NV_ROP5_SOLID */ 302 ACCW(PR_CTX0_0, 0x02080043); /* NVclass $043, patchcfg ROP_AND, nv10+: little endian */ 303 ACCW(PR_CTX1_0, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 304 ACCW(PR_CTX2_0, 0x00000000); /* DMA0 and DMA1 instance invalid */ 305 ACCW(PR_CTX3_0, 0x00000000); /* method traps disabled */ 306 ACCW(PR_CTX0_1, 0x00000000); /* extra */ 307 ACCW(PR_CTX1_1, 0x00000000); /* extra */ 308 /* setup set '1' for cmd NV_IMAGE_BLACK_RECTANGLE */ 309 ACCW(PR_CTX0_2, 0x02080019); /* NVclass $019, patchcfg ROP_AND, nv10+: little endian */ 310 ACCW(PR_CTX1_2, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 311 ACCW(PR_CTX2_2, 0x00000000); /* DMA0 and DMA1 instance invalid */ 312 ACCW(PR_CTX3_2, 0x00000000); /* method traps disabled */ 313 ACCW(PR_CTX0_3, 0x00000000); /* extra */ 314 ACCW(PR_CTX1_3, 0x00000000); /* extra */ 315 /* setup set '2' for cmd NV_IMAGE_PATTERN */ 316 ACCW(PR_CTX0_4, 0x02080018); /* NVclass $018, patchcfg ROP_AND, nv10+: little endian */ 317 ACCW(PR_CTX1_4, 0x02000000); /* colorspace not set, notify instance is $0200 (b16-31) */ 318 ACCW(PR_CTX2_4, 0x00000000); /* DMA0 and DMA1 instance invalid */ 319 ACCW(PR_CTX3_4, 0x00000000); /* method traps disabled */ 320 ACCW(PR_CTX0_5, 0x00000000); /* extra */ 321 ACCW(PR_CTX1_5, 0x00000000); /* extra */ 322 /* setup set '4' for cmd NV12_IMAGE_BLIT */ 323 ACCW(PR_CTX0_6, 0x0208009f); /* NVclass $09f, patchcfg ROP_AND, nv10+: little endian */ 324 ACCW(PR_CTX1_6, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 325 ACCW(PR_CTX2_6, 0x00001140); /* DMA0 instance is $1140, DMA1 instance invalid */ 326 ACCW(PR_CTX3_6, 0x00001140); /* method trap 0 is $1140, trap 1 disabled */ 327 ACCW(PR_CTX0_7, 0x00000000); /* extra */ 328 ACCW(PR_CTX1_7, 0x00000000); /* extra */ 329 /* setup set '5' for cmd NV4_GDI_RECTANGLE_TEXT */ 330 ACCW(PR_CTX0_8, 0x0208004a); /* NVclass $04a, patchcfg ROP_AND, nv10+: little endian */ 331 ACCW(PR_CTX1_8, 0x02000000); /* colorspace not set, notify instance is $0200 (b16-31) */ 332 ACCW(PR_CTX2_8, 0x00000000); /* DMA0 and DMA1 instance invalid */ 333 ACCW(PR_CTX3_8, 0x00000000); /* method traps disabled */ 334 ACCW(PR_CTX0_9, 0x00000000); /* extra */ 335 ACCW(PR_CTX1_9, 0x00000000); /* extra */ 336 /* setup set '6' for cmd NV10_CONTEXT_SURFACES_2D */ 337 ACCW(PR_CTX0_A, 0x02080062); /* NVclass $062, nv10+: little endian */ 338 ACCW(PR_CTX1_A, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 339 ACCW(PR_CTX2_A, 0x00001140); /* DMA0 instance is $1140, DMA1 instance invalid */ 340 ACCW(PR_CTX3_A, 0x00001140); /* method trap 0 is $1140, trap 1 disabled */ 341 ACCW(PR_CTX0_B, 0x00000000); /* extra */ 342 ACCW(PR_CTX1_B, 0x00000000); /* extra */ 343 /* setup set '7' for cmd NV_SCALED_IMAGE_FROM_MEMORY */ 344 ACCW(PR_CTX0_C, 0x02080077); /* NVclass $077, nv10+: little endian */ 345 ACCW(PR_CTX1_C, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 346 ACCW(PR_CTX2_C, 0x00001140); /* DMA0 instance is $1140, DMA1 instance invalid */ 347 ACCW(PR_CTX3_C, 0x00001140); /* method trap 0 is $1140, trap 1 disabled */ 348 ACCW(PR_CTX0_D, 0x00000000); /* extra */ 349 ACCW(PR_CTX1_D, 0x00000000); /* extra */ 350 /* setup DMA set pointed at by PF_CACH1_DMAI */ 351 ACCW(PR_CTX0_E, 0x00003002); /* DMA page table present and of linear type; 352 * DMA class is $002 (b0-11); 353 * DMA target node is NVM (non-volatile memory?) 354 * (instead of doing PCI or AGP transfers) */ 355 ACCW(PR_CTX1_E, 0x00007fff); /* DMA limit: tablesize is 32k bytes */ 356 ACCW(PR_CTX2_E, (((si->ps.memory_size - 1) & 0xffff8000) | 0x00000002)); 357 /* DMA access type is READ_AND_WRITE; 358 * table is located at end of cardRAM (b12-31): 359 * It's adress needs to be at a 4kb boundary! */ 360 } 361 else 362 { 363 /* setup a DMA define for use by command defines below. */ 364 ACCW(PR_CTX0_R, 0x00003000); /* DMA page table present and of linear type; 365 * DMA target node is NVM (non-volatile memory?) 366 * (instead of doing PCI or AGP transfers) */ 367 ACCW(PR_CTX1_R, (si->ps.memory_size - 1)); /* DMA limit: size is all cardRAM */ 368 ACCW(PR_CTX2_R, ((0x00000000 & 0xfffff000) | 0x00000002)); 369 /* DMA access type is READ_AND_WRITE; 370 * memory starts at start of cardRAM (b12-31): 371 * It's adress needs to be at a 4kb boundary! */ 372 ACCW(PR_CTX3_R, 0x00000002); /* unknown (looks like this is rubbish/not needed?) */ 373 /* setup set '0' for cmd NV_ROP5_SOLID */ 374 ACCW(PR_CTX0_0, 0x01008043); /* NVclass $043, patchcfg ROP_AND, nv10+: little endian */ 375 ACCW(PR_CTX1_0, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 376 ACCW(PR_CTX2_0, 0x00000000); /* DMA0 and DMA1 instance invalid */ 377 ACCW(PR_CTX3_0, 0x00000000); /* method traps disabled */ 378 /* setup set '1' for cmd NV_IMAGE_BLACK_RECTANGLE */ 379 ACCW(PR_CTX0_1, 0x01008019); /* NVclass $019, patchcfg ROP_AND, nv10+: little endian */ 380 ACCW(PR_CTX1_1, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 381 ACCW(PR_CTX2_1, 0x00000000); /* DMA0 and DMA1 instance invalid */ 382 ACCW(PR_CTX3_1, 0x00000000); /* method traps disabled */ 383 /* setup set '2' for cmd NV_IMAGE_PATTERN */ 384 ACCW(PR_CTX0_2, 0x01008018); /* NVclass $018, patchcfg ROP_AND, nv10+: little endian */ 385 ACCW(PR_CTX1_2, 0x00000002); /* colorspace not set, notify instance is $0200 (b16-31) */ 386 ACCW(PR_CTX2_2, 0x00000000); /* DMA0 and DMA1 instance invalid */ 387 ACCW(PR_CTX3_2, 0x00000000); /* method traps disabled */ 388 /* setup set '3' for ... */ 389 if(si->ps.card_arch >= NV10A) 390 { 391 /* ... cmd NV10_CONTEXT_SURFACES_2D */ 392 ACCW(PR_CTX0_3, 0x01008062); /* NVclass $062, nv10+: little endian */ 393 } 394 else 395 { 396 /* ... cmd NV4_SURFACE */ 397 ACCW(PR_CTX0_3, 0x01008042); /* NVclass $042, nv10+: little endian */ 398 } 399 ACCW(PR_CTX1_3, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 400 ACCW(PR_CTX2_3, 0x11401140); /* DMA0 instance is $1140, DMA1 instance invalid */ 401 ACCW(PR_CTX3_3, 0x00000000); /* method trap 0 is $1140, trap 1 disabled */ 402 /* setup set '4' for ... */ 403 if (si->ps.card_type >= NV11) 404 { 405 /* ... cmd NV12_IMAGE_BLIT */ 406 ACCW(PR_CTX0_4, 0x0100809f); /* NVclass $09f, patchcfg ROP_AND, nv10+: little endian */ 407 } 408 else 409 { 410 /* ... cmd NV_IMAGE_BLIT */ 411 ACCW(PR_CTX0_4, 0x0100805f); /* NVclass $05f, patchcfg ROP_AND, nv10+: little endian */ 412 } 413 ACCW(PR_CTX1_4, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 414 ACCW(PR_CTX2_4, 0x11401140); /* DMA0 instance is $1140, DMA1 instance invalid */ 415 ACCW(PR_CTX3_4, 0x00000000); /* method trap 0 is $1140, trap 1 disabled */ 416 /* setup set '5' for cmd NV4_GDI_RECTANGLE_TEXT */ 417 ACCW(PR_CTX0_5, 0x0100804a); /* NVclass $04a, patchcfg ROP_AND, nv10+: little endian */ 418 ACCW(PR_CTX1_5, 0x00000002); /* colorspace not set, notify instance is $0200 (b16-31) */ 419 ACCW(PR_CTX2_5, 0x00000000); /* DMA0 and DMA1 instance invalid */ 420 ACCW(PR_CTX3_5, 0x00000000); /* method traps disabled */ 421 /* setup set '6' ... */ 422 if (si->ps.card_arch >= NV10A) 423 { 424 /* ... for cmd NV10_CONTEXT_SURFACES_ARGB_ZS */ 425 ACCW(PR_CTX0_6, 0x00000093); /* NVclass $093, nv10+: little endian */ 426 } 427 else 428 { 429 /* ... for cmd NV4_CONTEXT_SURFACES_ARGB_ZS */ 430 ACCW(PR_CTX0_6, 0x00000053); /* NVclass $053, nv10+: little endian */ 431 } 432 ACCW(PR_CTX1_6, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 433 ACCW(PR_CTX2_6, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 434 ACCW(PR_CTX3_6, 0x00000000); /* method traps disabled */ 435 /* setup set '7' ... */ 436 if (si->ps.card_arch >= NV10A) 437 { 438 /* ... for cmd NV10_DX5_TEXTURE_TRIANGLE */ 439 ACCW(PR_CTX0_7, 0x0300a094); /* NVclass $094, patchcfg ROP_AND, userclip enable, 440 * context surface0 valid, nv10+: little endian */ 441 } 442 else 443 { 444 /* ... for cmd NV4_DX5_TEXTURE_TRIANGLE */ 445 ACCW(PR_CTX0_7, 0x0300a054); /* NVclass $054, patchcfg ROP_AND, userclip enable, 446 * context surface0 valid */ 447 } 448 ACCW(PR_CTX1_7, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 449 ACCW(PR_CTX2_7, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 450 ACCW(PR_CTX3_7, 0x00000000); /* method traps disabled */ 451 /* setup set '8' ... */ 452 if (si->ps.card_arch >= NV10A) 453 { 454 /* ... for cmd NV10_DX6_MULTI_TEXTURE_TRIANGLE (not used) */ 455 ACCW(PR_CTX0_8, 0x0300a095); /* NVclass $095, patchcfg ROP_AND, userclip enable, 456 * context surface0 valid, nv10+: little endian */ 457 } 458 else 459 { 460 /* ... for cmd NV4_DX6_MULTI_TEXTURE_TRIANGLE (not used) */ 461 ACCW(PR_CTX0_8, 0x0300a055); /* NVclass $055, patchcfg ROP_AND, userclip enable, 462 * context surface0 valid */ 463 } 464 ACCW(PR_CTX1_8, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 465 ACCW(PR_CTX2_8, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 466 ACCW(PR_CTX3_8, 0x00000000); /* method traps disabled */ 467 /* setup set '9' for cmd NV_SCALED_IMAGE_FROM_MEMORY */ 468 ACCW(PR_CTX0_9, 0x01018077); /* NVclass $077, patchcfg SRC_COPY, 469 * context surface0 valid, nv10+: little endian */ 470 ACCW(PR_CTX1_9, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 471 ACCW(PR_CTX2_9, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 472 ACCW(PR_CTX3_9, 0x00000000); /* method traps disabled */ 473 /* setup set 'A' for cmd NV1_RENDER_SOLID_LIN (not used) */ 474 ACCW(PR_CTX0_A, 0x0300a01c); /* NVclass $01c, patchcfg ROP_AND, userclip enable, 475 * context surface0 valid, nv10+: little endian */ 476 ACCW(PR_CTX1_A, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 477 ACCW(PR_CTX2_A, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 478 ACCW(PR_CTX3_A, 0x00000000); /* method traps disabled */ 479 //2007 3D tests.. 480 /* setup set 'B' ... */ 481 if (si->ps.card_type == NV15) 482 { 483 /* ... for cmd NV11_TCL_PRIMITIVE_3D */ 484 ACCW(PR_CTX0_B, 0x0300a096); /* NVclass $096, patchcfg ROP_AND, userclip enable, 485 * context surface0 valid, nv10+: little endian */ 486 ACCW(PR_CTX1_B, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 487 ACCW(PR_CTX2_B, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 488 ACCW(PR_CTX3_B, 0x00000000); /* method traps disabled */ 489 } 490 /* setup DMA set pointed at by PF_CACH1_DMAI */ 491 if (si->engine.agp_mode) 492 { 493 /* DMA page table present and of linear type; 494 * DMA class is $002 (b0-11); 495 * DMA target node is AGP */ 496 ACCW(PR_CTX0_C, 0x00033002); 497 } 498 else 499 { 500 /* DMA page table present and of linear type; 501 * DMA class is $002 (b0-11); 502 * DMA target node is PCI */ 503 ACCW(PR_CTX0_C, 0x00023002); 504 } 505 ACCW(PR_CTX1_C, 0x000fffff); /* DMA limit: tablesize is 1M bytes */ 506 ACCW(PR_CTX2_C, (((uint32)((uint8 *)(si->dma_buffer_pci))) | 0x00000002)); 507 /* DMA access type is READ_AND_WRITE; 508 * table is located in main system RAM (b12-31): 509 * It's adress needs to be at a 4kb boundary! */ 510 511 /* set the 3D rendering functions colordepth via BPIXEL's 'depth 2' */ 512 /* note: 513 * setting a depth to 'invalid' (zero) makes the engine report 514 * ready with drawing 'immediately'. */ 515 //fixme: NV30A and above (probably) needs to be corrected... 516 switch(si->dm.space) 517 { 518 case B_CMAP8: 519 if (si->ps.card_arch < NV30A) 520 /* set depth 2: $1 = Y8 */ 521 ACCW(BPIXEL, 0x00000100); 522 else 523 /* set depth 0-1: $1 = Y8, $2 = X1R5G5B5_Z1R5G5B5 */ 524 ACCW(BPIXEL, 0x00000021); 525 break; 526 case B_RGB15_LITTLE: 527 if (si->ps.card_arch < NV30A) 528 /* set depth 2: $4 = A1R5G5B5 */ 529 ACCW(BPIXEL, 0x00000400); 530 else 531 /* set depth 0-1: $2 = X1R5G5B5_Z1R5G5B5, $4 = A1R5G5B5 */ 532 ACCW(BPIXEL, 0x00000042); 533 break; 534 case B_RGB16_LITTLE: 535 if (si->ps.card_arch < NV30A) 536 /* set depth 2: $5 = R5G6B5 */ 537 ACCW(BPIXEL, 0x00000500); 538 else 539 /* set depth 0-1: $5 = R5G6B5, $a = X1A7R8G8B8_O1A7R8G8B8 */ 540 ACCW(BPIXEL, 0x000000a5); 541 break; 542 case B_RGB32_LITTLE: 543 case B_RGBA32_LITTLE: 544 if (si->ps.card_arch < NV30A) 545 /* set depth 2: $c = A8R8G8B8 */ 546 ACCW(BPIXEL, 0x00000c00); 547 else 548 /* set depth 0-1: $7 = X8R8G8B8_Z8R8G8B8, $e = V8YB8U8YA8 */ 549 ACCW(BPIXEL, 0x000000e7); 550 break; 551 default: 552 LOG(8,("ACC: init, invalid bit depth\n")); 553 return B_ERROR; 554 } 555 } 556 557 if (si->ps.card_arch == NV04A) 558 { 559 /* do a explicit engine reset */ 560 ACCW(DEBUG0, 0x000001ff); 561 562 /* init some function blocks */ 563 /* DEBUG0, b20 and b21 should be high, this has a big influence on 564 * 3D rendering speed! (on all cards, confirmed) */ 565 ACCW(DEBUG0, 0x1230c000); 566 /* DEBUG1, b19 = 1 increases 3D rendering speed on TNT2 (M64) a bit, 567 * TNT1 rendering speed stays the same (all cards confirmed) */ 568 ACCW(DEBUG1, 0x72191101); 569 ACCW(DEBUG2, 0x11d5f071); 570 ACCW(DEBUG3, 0x0004ff31); 571 /* init OP methods */ 572 ACCW(DEBUG3, 0x4004ff31); 573 574 /* disable all acceleration engine INT reguests */ 575 ACCW(ACC_INTE, 0x00000000); 576 /* reset all acceration engine INT status bits */ 577 ACCW(ACC_INTS, 0xffffffff); 578 /* context control enabled */ 579 ACCW(NV04_CTX_CTRL, 0x10010100); 580 /* all acceleration buffers, pitches and colors are valid */ 581 ACCW(NV04_ACC_STAT, 0xffffffff); 582 /* enable acceleration engine command FIFO */ 583 ACCW(FIFO_EN, 0x00000001); 584 585 /* setup location of active screen in framebuffer */ 586 ACCW(OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 587 ACCW(OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 588 /* setup accesible card memory range */ 589 ACCW(BLIMIT0, (si->ps.memory_size - 1)); 590 ACCW(BLIMIT1, (si->ps.memory_size - 1)); 591 592 /* pattern shape value = 8x8, 2 color */ 593 //fixme: not needed, unless the engine has a hardware fault (setting via cmd)! 594 //ACCW(PAT_SHP, 0x00000000); 595 /* Pgraph Beta AND value (fraction) b23-30 */ 596 ACCW(BETA_AND_VAL, 0xffffffff); 597 } 598 else 599 { 600 /* do a explicit engine reset */ 601 ACCW(DEBUG0, 0xffffffff); 602 ACCW(DEBUG0, 0x00000000); 603 /* disable all acceleration engine INT reguests */ 604 ACCW(ACC_INTE, 0x00000000); 605 /* reset all acceration engine INT status bits */ 606 ACCW(ACC_INTS, 0xffffffff); 607 /* context control enabled */ 608 ACCW(NV10_CTX_CTRL, 0x10010100); 609 /* all acceleration buffers, pitches and colors are valid */ 610 ACCW(NV10_ACC_STAT, 0xffffffff); 611 /* enable acceleration engine command FIFO */ 612 ACCW(FIFO_EN, 0x00000001); 613 /* setup surface type: 614 * b1-0 = %01 = surface type is non-swizzle; 615 * this is needed to enable 3D on NV1x (confirmed) and maybe others? */ 616 ACCW(NV10_SURF_TYP, ((ACCR(NV10_SURF_TYP)) & 0x0007ff00)); 617 ACCW(NV10_SURF_TYP, ((ACCR(NV10_SURF_TYP)) | 0x00020101)); 618 } 619 620 if (si->ps.card_arch == NV10A) 621 { 622 /* init some function blocks */ 623 ACCW(DEBUG1, 0x00118700); 624 /* DEBUG2 has a big influence on 3D speed for NV11 and NV15 625 * (confirmed b3 and b18 should both be '1' on both cards!) 626 * (b16 should also be '1', increases 3D speed on NV11 a bit more) */ 627 ACCW(DEBUG2, 0x24fd2ad9); 628 ACCW(DEBUG3, 0x55de0030); 629 /* NV10_DEBUG4 has a big influence on 3D speed for NV11, NV15 and NV18 630 * (confirmed b14 and b15 should both be '1' on these cards!) 631 * (confirmed b8 should be '0' on NV18 to prevent complete engine crash!) */ 632 ACCW(NV10_DEBUG4, 0x0000c000); 633 634 /* copy tile setup stuff from 'source' to acc engine */ 635 for (cnt = 0; cnt < 32; cnt++) 636 { 637 NV_REG32(NVACC_NV10_TIL0AD + (cnt << 2)) = 638 NV_REG32(NVACC_NV10_FBTIL0AD + (cnt << 2)); 639 } 640 641 /* setup location of active screen in framebuffer */ 642 ACCW(OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 643 ACCW(OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 644 /* setup accesible card memory range */ 645 ACCW(BLIMIT0, (si->ps.memory_size - 1)); 646 ACCW(BLIMIT1, (si->ps.memory_size - 1)); 647 648 /* pattern shape value = 8x8, 2 color */ 649 //fixme: not needed, unless the engine has a hardware fault (setting via cmd)! 650 //ACCW(PAT_SHP, 0x00000000); 651 /* Pgraph Beta AND value (fraction) b23-30 */ 652 ACCW(BETA_AND_VAL, 0xffffffff); 653 } 654 655 if (si->ps.card_arch >= NV20A) 656 { 657 switch (si->ps.card_arch) 658 { 659 case NV40A: 660 /* init some function blocks */ 661 ACCW(DEBUG1, 0x401287c0); 662 ACCW(DEBUG3, 0x60de8051); 663 /* disable specific functions, but enable SETUP_SPARE2 register */ 664 ACCW(NV10_DEBUG4, 0x00008000); 665 /* set limit_viol_pix_adress(?): more likely something unknown.. */ 666 ACCW(NV25_WHAT0, 0x00be3c5f); 667 668 /* setup some unknown serially accessed registers (?) */ 669 tmp = (NV_REG32(NV32_NV4X_WHAT0) & 0x000000ff); 670 for (cnt = 0; (tmp && !(tmp & 0x00000001)); tmp >>= 1, cnt++); 671 { 672 ACCW(NV4X_WHAT2, cnt); 673 } 674 675 /* unknown.. */ 676 switch (si->ps.card_type) 677 { 678 case NV40: 679 case NV45: 680 /* and NV48: but these are pgm'd as NV45 currently */ 681 ACCW(NV40_WHAT0, 0x83280fff); 682 ACCW(NV40_WHAT1, 0x000000a0); 683 ACCW(NV40_WHAT2, 0x0078e366); 684 ACCW(NV40_WHAT3, 0x0000014c); 685 break; 686 case NV41: 687 /* and ID == 0x012x: but no cards defined yet */ 688 ACCW(NV40P_WHAT0, 0x83280eff); 689 ACCW(NV40P_WHAT1, 0x000000a0); 690 ACCW(NV40P_WHAT2, 0x007596ff); 691 ACCW(NV40P_WHAT3, 0x00000108); 692 break; 693 case NV43: 694 ACCW(NV40P_WHAT0, 0x83280eff); 695 ACCW(NV40P_WHAT1, 0x000000a0); 696 ACCW(NV40P_WHAT2, 0x0072cb77); 697 ACCW(NV40P_WHAT3, 0x00000108); 698 break; 699 case NV44: 700 case G72: 701 ACCW(NV40P_WHAT0, 0x83280eff); 702 ACCW(NV40P_WHAT1, 0x000000a0); 703 704 NV_REG32(NV32_NV44_WHAT10) = NV_REG32(NV32_NV10STRAPINFO); 705 NV_REG32(NV32_NV44_WHAT11) = 0x00000000; 706 NV_REG32(NV32_NV44_WHAT12) = 0x00000000; 707 NV_REG32(NV32_NV44_WHAT13) = NV_REG32(NV32_NV10STRAPINFO); 708 709 ACCW(NV44_WHAT2, 0x00000000); 710 ACCW(NV44_WHAT3, 0x00000000); 711 break; 712 /* case NV44 type 2: (cardID 0x022x) 713 //fixme if needed: doesn't seem to need the strapinfo thing.. 714 ACCW(NV40P_WHAT0, 0x83280eff); 715 ACCW(NV40P_WHAT1, 0x000000a0); 716 717 ACCW(NV44_WHAT2, 0x00000000); 718 ACCW(NV44_WHAT3, 0x00000000); 719 break; 720 */ case G70: 721 case G71: 722 case G73: 723 ACCW(NV40P_WHAT0, 0x83280eff); 724 ACCW(NV40P_WHAT1, 0x000000a0); 725 ACCW(NV40P_WHAT2, 0x07830610); 726 ACCW(NV40P_WHAT3, 0x0000016a); 727 break; 728 default: 729 ACCW(NV40P_WHAT0, 0x83280eff); 730 ACCW(NV40P_WHAT1, 0x000000a0); 731 break; 732 } 733 734 ACCW(NV10_TIL3PT, 0x2ffff800); 735 ACCW(NV10_TIL3ST, 0x00006000); 736 ACCW(NV4X_WHAT1, 0x01000000); 737 /* engine data source DMA instance = $1140 */ 738 ACCW(NV4X_DMA_SRC, 0x00001140); 739 break; 740 case NV30A: 741 /* init some function blocks, but most is unknown.. */ 742 ACCW(DEBUG1, 0x40108700); 743 ACCW(NV25_WHAT1, 0x00140000); 744 ACCW(DEBUG3, 0xf00e0431); 745 ACCW(NV10_DEBUG4, 0x00008000); 746 ACCW(NV25_WHAT0, 0xf04b1f36); 747 ACCW(NV20_WHAT3, 0x1002d888); 748 ACCW(NV25_WHAT2, 0x62ff007f); 749 break; 750 case NV20A: 751 /* init some function blocks, but most is unknown.. */ 752 ACCW(DEBUG1, 0x00118700); 753 ACCW(DEBUG3, 0xf20e0431); 754 ACCW(NV10_DEBUG4, 0x00000000); 755 ACCW(NV20_WHAT1, 0x00000040); 756 if (si->ps.card_type < NV25) 757 { 758 ACCW(NV20_WHAT2, 0x00080000); 759 ACCW(NV10_DEBUG5, 0x00000005); 760 ACCW(NV20_WHAT3, 0x45caa208); 761 ACCW(NV20_WHAT4, 0x24000000); 762 ACCW(NV20_WHAT5, 0x00000040); 763 764 /* copy some fixed RAM(?) configuration info(?) to some indexed registers: */ 765 /* b16-24 is select; b2-13 is adress in 32-bit words */ 766 ACCW(RDI_INDEX, 0x00e00038); 767 /* data is 32-bit */ 768 ACCW(RDI_DATA, 0x00000030); 769 /* copy some fixed RAM(?) configuration info(?) to some indexed registers: */ 770 /* b16-24 is select; b2-13 is adress in 32-bit words */ 771 ACCW(RDI_INDEX, 0x00e10038); 772 /* data is 32-bit */ 773 ACCW(RDI_DATA, 0x00000030); 774 } 775 else 776 { 777 ACCW(NV25_WHAT1, 0x00080000); 778 ACCW(NV25_WHAT0, 0x304b1fb6); 779 ACCW(NV20_WHAT3, 0x18b82880); 780 ACCW(NV20_WHAT4, 0x44000000); 781 ACCW(NV20_WHAT5, 0x40000080); 782 ACCW(NV25_WHAT2, 0x000000ff); 783 } 784 break; 785 } 786 787 /* NV20A, NV30A and NV40A: */ 788 /* copy tile setup stuff from previous setup 'source' to acc engine 789 * (pattern colorRAM?) */ 790 if ((si->ps.card_type <= NV40) || (si->ps.card_type == NV45)) 791 { 792 for (cnt = 0; cnt < 32; cnt++) 793 { 794 /* copy NV10_FBTIL0AD upto/including NV10_FBTIL7ST */ 795 NV_REG32(NVACC_NV20_WHAT0 + (cnt << 2)) = 796 NV_REG32(NVACC_NV10_FBTIL0AD + (cnt << 2)); 797 798 /* copy NV10_FBTIL0AD upto/including NV10_FBTIL7ST */ 799 NV_REG32(NVACC_NV20_2_WHAT0 + (cnt << 2)) = 800 NV_REG32(NVACC_NV10_FBTIL0AD + (cnt << 2)); 801 } 802 } 803 else 804 { 805 /* NV41, 43, 44, G70 and later */ 806 if (si->ps.card_type >= G70) 807 { 808 for (cnt = 0; cnt < 60; cnt++) 809 { 810 /* copy NV41_FBTIL0AD upto/including G70_FBTILEST */ 811 NV_REG32(NVACC_NV41_WHAT0 + (cnt << 2)) = 812 NV_REG32(NVACC_NV41_FBTIL0AD + (cnt << 2)); 813 814 /* copy NV41_FBTIL0AD upto/including G70_FBTILEST */ 815 NV_REG32(NVACC_NV20_2_WHAT0 + (cnt << 2)) = 816 NV_REG32(NVACC_NV41_FBTIL0AD + (cnt << 2)); 817 } 818 } 819 else 820 { 821 /* NV41, 43, 44 */ 822 for (cnt = 0; cnt < 48; cnt++) 823 { 824 /* copy NV41_FBTIL0AD upto/including NV41_FBTILBST */ 825 NV_REG32(NVACC_NV20_WHAT0 + (cnt << 2)) = 826 NV_REG32(NVACC_NV41_FBTIL0AD + (cnt << 2)); 827 828 if (si->ps.card_type != NV44) 829 { 830 /* copy NV41_FBTIL0AD upto/including NV41_FBTILBST */ 831 NV_REG32(NVACC_NV20_2_WHAT0 + (cnt << 2)) = 832 NV_REG32(NVACC_NV41_FBTIL0AD + (cnt << 2)); 833 } 834 } 835 } 836 } 837 838 if (si->ps.card_arch >= NV40A) 839 { 840 if ((si->ps.card_type == NV40) || (si->ps.card_type == NV45)) 841 { 842 /* copy some RAM configuration info(?) */ 843 ACCW(NV20_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0)); 844 ACCW(NV20_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1)); 845 ACCW(NV40_WHAT_T2, NV_REG32(NV32_PFB_CONFIG_0)); 846 ACCW(NV40_WHAT_T3, NV_REG32(NV32_PFB_CONFIG_1)); 847 848 /* setup location of active screen in framebuffer */ 849 ACCW(NV20_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 850 ACCW(NV20_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 851 /* setup accesible card memory range */ 852 ACCW(NV20_BLIMIT6, (si->ps.memory_size - 1)); 853 ACCW(NV20_BLIMIT7, (si->ps.memory_size - 1)); 854 } 855 else 856 { 857 /* NV41, 43, 44, G70 and later */ 858 859 /* copy some RAM configuration info(?) */ 860 if (si->ps.card_type >= G70) 861 { 862 ACCW(G70_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0)); 863 ACCW(G70_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1)); 864 } 865 else 866 { 867 /* NV41, 43, 44 */ 868 ACCW(NV40P_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0)); 869 ACCW(NV40P_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1)); 870 } 871 ACCW(NV40P_WHAT_T2, NV_REG32(NV32_PFB_CONFIG_0)); 872 ACCW(NV40P_WHAT_T3, NV_REG32(NV32_PFB_CONFIG_1)); 873 874 /* setup location of active screen in framebuffer */ 875 ACCW(NV40P_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 876 ACCW(NV40P_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 877 /* setup accesible card memory range */ 878 ACCW(NV40P_BLIMIT6, (si->ps.memory_size - 1)); 879 ACCW(NV40P_BLIMIT7, (si->ps.memory_size - 1)); 880 } 881 } 882 else /* NV20A and NV30A: */ 883 { 884 /* copy some RAM configuration info(?) */ 885 ACCW(NV20_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0)); 886 ACCW(NV20_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1)); 887 /* copy some RAM configuration info(?) to some indexed registers: */ 888 /* b16-24 is select; b2-13 is adress in 32-bit words */ 889 ACCW(RDI_INDEX, 0x00ea0000); 890 /* data is 32-bit */ 891 ACCW(RDI_DATA, NV_REG32(NV32_PFB_CONFIG_0)); 892 /* b16-24 is select; b2-13 is adress in 32-bit words */ 893 ACCW(RDI_INDEX, 0x00ea0004); 894 /* data is 32-bit */ 895 ACCW(RDI_DATA, NV_REG32(NV32_PFB_CONFIG_1)); 896 897 /* setup location of active screen in framebuffer */ 898 ACCW(NV20_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 899 ACCW(NV20_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 900 /* setup accesible card memory range */ 901 ACCW(NV20_BLIMIT6, (si->ps.memory_size - 1)); 902 ACCW(NV20_BLIMIT7, (si->ps.memory_size - 1)); 903 } 904 905 /* NV20A, NV30A and NV40A: */ 906 /* setup some acc engine tile stuff */ 907 ACCW(NV10_TIL2AD, 0x00000000); 908 ACCW(NV10_TIL0ED, 0xffffffff); 909 } 910 911 /* all cards: */ 912 /* setup clipping: rect size is 32768 x 32768, probably max. setting */ 913 /* note: 914 * can also be done via the NV_IMAGE_BLACK_RECTANGLE engine command. */ 915 ACCW(ABS_UCLP_XMIN, 0x00000000); 916 ACCW(ABS_UCLP_YMIN, 0x00000000); 917 ACCW(ABS_UCLP_XMAX, 0x00007fff); 918 ACCW(ABS_UCLP_YMAX, 0x00007fff); 919 920 /* setup sync parameters for NV12_IMAGE_BLIT command for the current mode: 921 * values given are CRTC vertical counter limit values. The NV12 command will wait 922 * for the specified's CRTC's vertical counter to be in between the given values */ 923 if (si->ps.card_type >= NV11) 924 { 925 ACCW(NV11_CRTC_LO, si->dm.timing.v_display - 1); 926 ACCW(NV11_CRTC_HI, si->dm.timing.v_display + 1); 927 } 928 929 /*** PFIFO ***/ 930 /* (setup caches) */ 931 /* disable caches reassign */ 932 ACCW(PF_CACHES, 0x00000000); 933 /* PFIFO mode: channel 0 is in DMA mode, channels 1 - 32 are in PIO mode */ 934 ACCW(PF_MODE, 0x00000001); 935 /* cache1 push0 access disabled */ 936 ACCW(PF_CACH1_PSH0, 0x00000000); 937 /* cache1 pull0 access disabled */ 938 ACCW(PF_CACH1_PUL0, 0x00000000); 939 /* cache1 push1 mode = DMA */ 940 if (si->ps.card_arch >= NV40A) 941 ACCW(PF_CACH1_PSH1, 0x00010000); 942 else 943 ACCW(PF_CACH1_PSH1, 0x00000100); 944 /* cache1 DMA Put offset = 0 (b2-28) */ 945 ACCW(PF_CACH1_DMAP, 0x00000000); 946 /* cache1 DMA Get offset = 0 (b2-28) */ 947 ACCW(PF_CACH1_DMAG, 0x00000000); 948 /* cache1 DMA instance adress = $114e (b0-15); 949 * instance being b4-19 with baseadress NV_PRAMIN_CTX_0 (0x00700000). */ 950 /* note: 951 * should point to a DMA definition in CTX register space (which is sort of RAM). 952 * This define tells the engine where the DMA cmd buffer is and what it's size is. 953 * Inside that cmd buffer you'll find the actual issued engine commands. */ 954 if (si->ps.card_arch >= NV40A) 955 ACCW(PF_CACH1_DMAI, 0x00001150); 956 else 957 //2007 3d test.. 958 ACCW(PF_CACH1_DMAI, 0x0000114e); 959 /* cache0 push0 access disabled */ 960 ACCW(PF_CACH0_PSH0, 0x00000000); 961 /* cache0 pull0 access disabled */ 962 ACCW(PF_CACH0_PUL0, 0x00000000); 963 /* RAM HT (hash table) baseadress = $10000 (b4-8), size = 4k, 964 * search = 128 (is byte offset between hash 'sets') */ 965 /* note: 966 * so HT base is $00710000, last is $00710fff. 967 * In this space you define the engine command handles (HT_HANDL_XX), which 968 * in turn points to the defines in CTX register space (which is sort of RAM) */ 969 ACCW(PF_RAMHT, 0x03000100); 970 /* RAM FC baseadress = $11000 (b3-8) (size is fixed to 0.5k(?)) */ 971 /* note: 972 * so FC base is $00711000, last is $007111ff. (not used?) */ 973 ACCW(PF_RAMFC, 0x00000110); 974 /* RAM RO baseadress = $11200 (b1-8), size = 0.5k */ 975 /* note: 976 * so RO base is $00711200, last is $007113ff. (not used?) */ 977 /* note also: 978 * This means(?) the PRAMIN CTX registers are accessible from base $00711400. */ 979 ACCW(PF_RAMRO, 0x00000112); 980 /* PFIFO size: ch0-15 = 512 bytes, ch16-31 = 124 bytes */ 981 ACCW(PF_SIZE, 0x0000ffff); 982 /* cache1 hash instance = $ffff (b0-15) */ 983 ACCW(PF_CACH1_HASH, 0x0000ffff); 984 /* disable all PFIFO INTs */ 985 ACCW(PF_INTEN, 0x00000000); 986 /* reset all PFIFO INT status bits */ 987 ACCW(PF_INTSTAT, 0xffffffff); 988 /* cache0 pull0 engine = acceleration engine (graphics) */ 989 ACCW(PF_CACH0_PUL1, 0x00000001); 990 /* cache1 DMA control: disable some stuff */ 991 ACCW(PF_CACH1_DMAC, 0x00000000); 992 /* cache1 engine 0 upto/including 7 is software (could also be graphics or DVD) */ 993 ACCW(PF_CACH1_ENG, 0x00000000); 994 /* cache1 DMA fetch: trigger at 128 bytes, size is 32 bytes, max requests is 15, 995 * use little endian */ 996 ACCW(PF_CACH1_DMAF, 0x000f0078); 997 /* cache1 DMA push: b0 = 1: access is enabled */ 998 ACCW(PF_CACH1_DMAS, 0x00000001); 999 /* cache1 push0 access enabled */ 1000 ACCW(PF_CACH1_PSH0, 0x00000001); 1001 /* cache1 pull0 access enabled */ 1002 ACCW(PF_CACH1_PUL0, 0x00000001); 1003 /* cache1 pull1 engine = acceleration engine (graphics) */ 1004 ACCW(PF_CACH1_PUL1, 0x00000001); 1005 /* enable PFIFO caches reassign */ 1006 ACCW(PF_CACHES, 0x00000001); 1007 1008 /* setup 3D specifics */ 1009 nv_init_for_3D_dma(); 1010 1011 /*** init acceleration engine command info ***/ 1012 /* set object handles */ 1013 /* note: 1014 * probably depending on some other setup, there are 8 or 32 FIFO channels 1015 * available. Assuming the current setup only has 8 channels because the 'rest' 1016 * isn't setup here... */ 1017 si->engine.fifo.handle[0] = NV_ROP5_SOLID; 1018 si->engine.fifo.handle[1] = NV_IMAGE_BLACK_RECTANGLE; 1019 si->engine.fifo.handle[2] = NV_IMAGE_PATTERN; 1020 si->engine.fifo.handle[3] = NV4_SURFACE; /* NV10_CONTEXT_SURFACES_2D is identical */ 1021 si->engine.fifo.handle[4] = NV_IMAGE_BLIT; 1022 si->engine.fifo.handle[5] = NV4_GDI_RECTANGLE_TEXT; 1023 si->engine.fifo.handle[6] = NV4_CONTEXT_SURFACES_ARGB_ZS;//NV1_RENDER_SOLID_LIN; 1024 si->engine.fifo.handle[7] = NV4_DX5_TEXTURE_TRIANGLE; 1025 /* preset no FIFO channels assigned to cmd's */ 1026 for (cnt = 0; cnt < 0x20; cnt++) 1027 { 1028 si->engine.fifo.ch_ptr[cnt] = 0; 1029 } 1030 /* set handle's pointers to their assigned FIFO channels */ 1031 /* note: 1032 * b0-1 aren't used as adressbits. Using b0 to indicate a valid pointer. */ 1033 for (cnt = 0; cnt < 0x08; cnt++) 1034 { 1035 si->engine.fifo.ch_ptr[(si->engine.fifo.handle[cnt])] = 1036 (0x00000001 + (cnt * 0x00002000)); 1037 } 1038 1039 /*** init DMA command buffer info ***/ 1040 if (si->ps.card_arch >= NV40A) //main mem DMA buf on pre-NV40 1041 { 1042 si->dma_buffer = (void *)((char *)si->framebuffer + 1043 ((si->ps.memory_size - 1) & 0xffff8000)); 1044 } 1045 LOG(4,("ACC_DMA: command buffer is at adress $%08x\n", 1046 ((uint32)(si->dma_buffer)))); 1047 /* we have issued no DMA cmd's to the engine yet */ 1048 si->engine.dma.put = 0; 1049 /* the current first free adress in the DMA buffer is at offset 0 */ 1050 si->engine.dma.current = 0; 1051 /* the DMA buffer can hold 8k 32-bit words (it's 32kb in size), 1052 * or 256k 32-bit words (1Mb in size) dependant on architecture (for now) */ 1053 /* note: 1054 * one word is reserved at the end of the DMA buffer to be able to instruct the 1055 * engine to do a buffer wrap-around! 1056 * (DMA opcode 'noninc method': issue word $20000000.) */ 1057 if (si->ps.card_arch < NV40A) 1058 si->engine.dma.max = ((1 * 1024 * 1024) >> 2) - 1; 1059 else 1060 si->engine.dma.max = 8192 - 1; 1061 /* note the current free space we have left in the DMA buffer */ 1062 si->engine.dma.free = si->engine.dma.max - si->engine.dma.current; 1063 1064 /*** init FIFO via DMA command buffer. ***/ 1065 /* wait for room in fifo for new FIFO assigment cmds if needed: */ 1066 if (si->ps.card_arch >= NV40A) 1067 { 1068 if (nv_acc_fifofree_dma(12) != B_OK) return B_ERROR; 1069 } 1070 else 1071 { 1072 if (nv_acc_fifofree_dma(16) != B_OK) return B_ERROR; 1073 } 1074 1075 /* program new FIFO assignments */ 1076 /* Raster OPeration: */ 1077 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH0, si->engine.fifo.handle[0]); 1078 /* Clip: */ 1079 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH1, si->engine.fifo.handle[1]); 1080 /* Pattern: */ 1081 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH2, si->engine.fifo.handle[2]); 1082 /* 2D Surfaces: */ 1083 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH3, si->engine.fifo.handle[3]); 1084 /* Blit: */ 1085 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH4, si->engine.fifo.handle[4]); 1086 /* Bitmap: */ 1087 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH5, si->engine.fifo.handle[5]); 1088 if (si->ps.card_arch < NV40A) 1089 { 1090 /* 3D surfaces: (3D related only) */ 1091 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH6, si->engine.fifo.handle[6]); 1092 /* Textured Triangle: (3D only) */ 1093 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH7, si->engine.fifo.handle[7]); 1094 } 1095 1096 /*** Set pixel width ***/ 1097 switch(si->dm.space) 1098 { 1099 case B_CMAP8: 1100 surf_depth = 0x00000001; 1101 cmd_depth = 0x00000003; 1102 break; 1103 case B_RGB15_LITTLE: 1104 case B_RGB16_LITTLE: 1105 surf_depth = 0x00000004; 1106 cmd_depth = 0x00000001; 1107 break; 1108 case B_RGB32_LITTLE: 1109 case B_RGBA32_LITTLE: 1110 surf_depth = 0x00000006; 1111 cmd_depth = 0x00000003; 1112 break; 1113 default: 1114 LOG(8,("ACC_DMA: init, invalid bit depth\n")); 1115 return B_ERROR; 1116 } 1117 1118 /* wait for room in fifo for surface setup cmd if needed */ 1119 if (nv_acc_fifofree_dma(5) != B_OK) return B_ERROR; 1120 /* now setup 2D surface (writing 5 32bit words) */ 1121 nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 4); 1122 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = surf_depth; /* Format */ 1123 /* setup screen pitch */ 1124 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1125 ((si->fbc.bytes_per_row & 0x0000ffff) | (si->fbc.bytes_per_row << 16)); /* Pitch */ 1126 /* setup screen location */ 1127 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1128 ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer); /* OffsetSource */ 1129 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1130 ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer); /* OffsetDest */ 1131 1132 /* wait for room in fifo for pattern colordepth setup cmd if needed */ 1133 if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR; 1134 /* set pattern colordepth (writing 2 32bit words) */ 1135 nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETCOLORFORMAT, 1); 1136 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ 1137 1138 /* wait for room in fifo for bitmap colordepth setup cmd if needed */ 1139 if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR; 1140 /* set bitmap colordepth (writing 2 32bit words) */ 1141 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_SETCOLORFORMAT, 1); 1142 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ 1143 1144 /* Load our pattern into the engine: */ 1145 /* wait for room in fifo for pattern cmd if needed. */ 1146 if (nv_acc_fifofree_dma(7) != B_OK) return B_ERROR; 1147 /* now setup pattern (writing 7 32bit words) */ 1148 nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETSHAPE, 1); 1149 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000000; /* SetShape: 0 = 8x8, 1 = 64x1, 2 = 1x64 */ 1150 nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETCOLOR0, 4); 1151 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetColor0 */ 1152 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetColor1 */ 1153 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetPattern[0] */ 1154 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetPattern[1] */ 1155 1156 /* tell the engine to fetch and execute all (new) commands in the DMA buffer */ 1157 nv_start_dma(); 1158 1159 return B_OK; 1160 } 1161 1162 static void nv_init_for_3D_dma(void) 1163 { 1164 /* setup PGRAPH unknown registers and modify (pre-cleared) pipe stuff for 3D use */ 1165 if (si->ps.card_arch >= NV10A) 1166 { 1167 /* setup unknown PGRAPH stuff */ 1168 ACCW(PGWHAT_00, 0x00000000); 1169 ACCW(PGWHAT_01, 0x00000000); 1170 ACCW(PGWHAT_02, 0x00000000); 1171 ACCW(PGWHAT_03, 0x00000000); 1172 1173 ACCW(PGWHAT_04, 0x00001000); 1174 ACCW(PGWHAT_05, 0x00001000); 1175 ACCW(PGWHAT_06, 0x4003ff80); 1176 1177 ACCW(PGWHAT_07, 0x00000000); 1178 ACCW(PGWHAT_08, 0x00000000); 1179 ACCW(PGWHAT_09, 0x00000000); 1180 ACCW(PGWHAT_0A, 0x00000000); 1181 ACCW(PGWHAT_0B, 0x00000000); 1182 1183 ACCW(PGWHAT_0C, 0x00080008); 1184 ACCW(PGWHAT_0D, 0x00080008); 1185 1186 ACCW(PGWHAT_0E, 0x00000000); 1187 ACCW(PGWHAT_0F, 0x00000000); 1188 ACCW(PGWHAT_10, 0x00000000); 1189 ACCW(PGWHAT_11, 0x00000000); 1190 ACCW(PGWHAT_12, 0x00000000); 1191 ACCW(PGWHAT_13, 0x00000000); 1192 ACCW(PGWHAT_14, 0x00000000); 1193 ACCW(PGWHAT_15, 0x00000000); 1194 ACCW(PGWHAT_16, 0x00000000); 1195 ACCW(PGWHAT_17, 0x00000000); 1196 ACCW(PGWHAT_18, 0x00000000); 1197 1198 ACCW(PGWHAT_19, 0x10000000); 1199 1200 ACCW(PGWHAT_1A, 0x00000000); 1201 ACCW(PGWHAT_1B, 0x00000000); 1202 ACCW(PGWHAT_1C, 0x00000000); 1203 ACCW(PGWHAT_1D, 0x00000000); 1204 ACCW(PGWHAT_1E, 0x00000000); 1205 ACCW(PGWHAT_1F, 0x00000000); 1206 ACCW(PGWHAT_20, 0x00000000); 1207 ACCW(PGWHAT_21, 0x00000000); 1208 1209 ACCW(PGWHAT_22, 0x08000000); 1210 1211 ACCW(PGWHAT_23, 0x00000000); 1212 ACCW(PGWHAT_24, 0x00000000); 1213 ACCW(PGWHAT_25, 0x00000000); 1214 ACCW(PGWHAT_26, 0x00000000); 1215 1216 ACCW(PGWHAT_27, 0x4b7fffff); 1217 1218 ACCW(PGWHAT_28, 0x00000000); 1219 ACCW(PGWHAT_29, 0x00000000); 1220 ACCW(PGWHAT_2A, 0x00000000); 1221 1222 /* setup window clipping */ 1223 /* b0-11 = min; b16-27 = max. 1224 * note: 1225 * probably two's complement values, so setting to max range here: 1226 * which would be -2048 upto/including +2047. */ 1227 /* horizontal */ 1228 ACCW(WINCLIP_H_0, 0x07ff0800); 1229 ACCW(WINCLIP_H_1, 0x07ff0800); 1230 ACCW(WINCLIP_H_2, 0x07ff0800); 1231 ACCW(WINCLIP_H_3, 0x07ff0800); 1232 ACCW(WINCLIP_H_4, 0x07ff0800); 1233 ACCW(WINCLIP_H_5, 0x07ff0800); 1234 ACCW(WINCLIP_H_6, 0x07ff0800); 1235 ACCW(WINCLIP_H_7, 0x07ff0800); 1236 /* vertical */ 1237 ACCW(WINCLIP_V_0, 0x07ff0800); 1238 ACCW(WINCLIP_V_1, 0x07ff0800); 1239 ACCW(WINCLIP_V_2, 0x07ff0800); 1240 ACCW(WINCLIP_V_3, 0x07ff0800); 1241 ACCW(WINCLIP_V_4, 0x07ff0800); 1242 ACCW(WINCLIP_V_5, 0x07ff0800); 1243 ACCW(WINCLIP_V_6, 0x07ff0800); 1244 ACCW(WINCLIP_V_7, 0x07ff0800); 1245 1246 /* setup (initialize) pipe: 1247 * needed to get valid 3D rendering on (at least) NV1x cards. Without this 1248 * those cards produce rubbish instead of 3D, although the engine itself keeps 1249 * running and 2D stays OK. */ 1250 1251 /* set eyetype to local, lightning etc. is off */ 1252 ACCW(NV10_XFMOD0, 0x10000000); 1253 /* disable all lights */ 1254 ACCW(NV10_XFMOD1, 0x00000000); 1255 1256 /* note: upon writing data into the PIPEDAT register, the PIPEADR is 1257 * probably auto-incremented! */ 1258 /* (pipe adress = b2-16, pipe data = b0-31) */ 1259 /* note: pipe adresses IGRAPH registers! */ 1260 ACCW(NV10_PIPEADR, 0x00006740); 1261 ACCW(NV10_PIPEDAT, 0x00000000); 1262 ACCW(NV10_PIPEDAT, 0x00000000); 1263 ACCW(NV10_PIPEDAT, 0x00000000); 1264 ACCW(NV10_PIPEDAT, 0x3f800000); 1265 1266 ACCW(NV10_PIPEADR, 0x00006750); 1267 ACCW(NV10_PIPEDAT, 0x40000000); 1268 ACCW(NV10_PIPEDAT, 0x40000000); 1269 ACCW(NV10_PIPEDAT, 0x40000000); 1270 ACCW(NV10_PIPEDAT, 0x40000000); 1271 1272 ACCW(NV10_PIPEADR, 0x00006760); 1273 ACCW(NV10_PIPEDAT, 0x00000000); 1274 ACCW(NV10_PIPEDAT, 0x00000000); 1275 ACCW(NV10_PIPEDAT, 0x3f800000); 1276 ACCW(NV10_PIPEDAT, 0x00000000); 1277 1278 ACCW(NV10_PIPEADR, 0x00006770); 1279 ACCW(NV10_PIPEDAT, 0xc5000000); 1280 ACCW(NV10_PIPEDAT, 0xc5000000); 1281 ACCW(NV10_PIPEDAT, 0x00000000); 1282 ACCW(NV10_PIPEDAT, 0x00000000); 1283 1284 ACCW(NV10_PIPEADR, 0x00006780); 1285 ACCW(NV10_PIPEDAT, 0x00000000); 1286 ACCW(NV10_PIPEDAT, 0x00000000); 1287 ACCW(NV10_PIPEDAT, 0x3f800000); 1288 ACCW(NV10_PIPEDAT, 0x00000000); 1289 1290 ACCW(NV10_PIPEADR, 0x000067a0); 1291 ACCW(NV10_PIPEDAT, 0x3f800000); 1292 ACCW(NV10_PIPEDAT, 0x3f800000); 1293 ACCW(NV10_PIPEDAT, 0x3f800000); 1294 ACCW(NV10_PIPEDAT, 0x3f800000); 1295 1296 ACCW(NV10_PIPEADR, 0x00006ab0); 1297 ACCW(NV10_PIPEDAT, 0x3f800000); 1298 ACCW(NV10_PIPEDAT, 0x3f800000); 1299 ACCW(NV10_PIPEDAT, 0x3f800000); 1300 1301 ACCW(NV10_PIPEADR, 0x00006ac0); 1302 ACCW(NV10_PIPEDAT, 0x00000000); 1303 ACCW(NV10_PIPEDAT, 0x00000000); 1304 ACCW(NV10_PIPEDAT, 0x00000000); 1305 1306 ACCW(NV10_PIPEADR, 0x00006c10); 1307 ACCW(NV10_PIPEDAT, 0xbf800000); 1308 1309 ACCW(NV10_PIPEADR, 0x00007030); 1310 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1311 1312 ACCW(NV10_PIPEADR, 0x00007040); 1313 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1314 1315 ACCW(NV10_PIPEADR, 0x00007050); 1316 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1317 1318 ACCW(NV10_PIPEADR, 0x00007060); 1319 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1320 1321 ACCW(NV10_PIPEADR, 0x00007070); 1322 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1323 1324 ACCW(NV10_PIPEADR, 0x00007080); 1325 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1326 1327 ACCW(NV10_PIPEADR, 0x00007090); 1328 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1329 1330 ACCW(NV10_PIPEADR, 0x000070a0); 1331 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1332 1333 ACCW(NV10_PIPEADR, 0x00006a80); 1334 ACCW(NV10_PIPEDAT, 0x00000000); 1335 ACCW(NV10_PIPEDAT, 0x00000000); 1336 ACCW(NV10_PIPEDAT, 0x3f800000); 1337 1338 ACCW(NV10_PIPEADR, 0x00006aa0); 1339 ACCW(NV10_PIPEDAT, 0x00000000); 1340 ACCW(NV10_PIPEDAT, 0x00000000); 1341 ACCW(NV10_PIPEDAT, 0x00000000); 1342 1343 /* select primitive type that will be drawn (tri's) */ 1344 ACCW(NV10_PIPEADR, 0x00000040); 1345 ACCW(NV10_PIPEDAT, 0x00000005); 1346 1347 ACCW(NV10_PIPEADR, 0x00006400); 1348 ACCW(NV10_PIPEDAT, 0x3f800000); 1349 ACCW(NV10_PIPEDAT, 0x3f800000); 1350 ACCW(NV10_PIPEDAT, 0x4b7fffff); 1351 ACCW(NV10_PIPEDAT, 0x00000000); 1352 1353 ACCW(NV10_PIPEADR, 0x00006410); 1354 ACCW(NV10_PIPEDAT, 0xc5000000); 1355 ACCW(NV10_PIPEDAT, 0xc5000000); 1356 ACCW(NV10_PIPEDAT, 0x00000000); 1357 ACCW(NV10_PIPEDAT, 0x00000000); 1358 1359 ACCW(NV10_PIPEADR, 0x00006420); 1360 ACCW(NV10_PIPEDAT, 0x00000000); 1361 ACCW(NV10_PIPEDAT, 0x00000000); 1362 ACCW(NV10_PIPEDAT, 0x00000000); 1363 ACCW(NV10_PIPEDAT, 0x00000000); 1364 1365 ACCW(NV10_PIPEADR, 0x00006430); 1366 ACCW(NV10_PIPEDAT, 0x00000000); 1367 ACCW(NV10_PIPEDAT, 0x00000000); 1368 ACCW(NV10_PIPEDAT, 0x00000000); 1369 ACCW(NV10_PIPEDAT, 0x00000000); 1370 1371 ACCW(NV10_PIPEADR, 0x000064c0); 1372 ACCW(NV10_PIPEDAT, 0x3f800000); 1373 ACCW(NV10_PIPEDAT, 0x3f800000); 1374 ACCW(NV10_PIPEDAT, 0x477fffff); 1375 ACCW(NV10_PIPEDAT, 0x3f800000); 1376 1377 ACCW(NV10_PIPEADR, 0x000064d0); 1378 ACCW(NV10_PIPEDAT, 0xc5000000); 1379 ACCW(NV10_PIPEDAT, 0xc5000000); 1380 ACCW(NV10_PIPEDAT, 0x00000000); 1381 ACCW(NV10_PIPEDAT, 0x00000000); 1382 1383 ACCW(NV10_PIPEADR, 0x000064e0); 1384 ACCW(NV10_PIPEDAT, 0xc4fff000); 1385 ACCW(NV10_PIPEDAT, 0xc4fff000); 1386 ACCW(NV10_PIPEDAT, 0x00000000); 1387 ACCW(NV10_PIPEDAT, 0x00000000); 1388 1389 ACCW(NV10_PIPEADR, 0x000064f0); 1390 ACCW(NV10_PIPEDAT, 0x00000000); 1391 ACCW(NV10_PIPEDAT, 0x00000000); 1392 ACCW(NV10_PIPEDAT, 0x00000000); 1393 ACCW(NV10_PIPEDAT, 0x00000000); 1394 1395 /* turn lightning on */ 1396 ACCW(NV10_XFMOD0, 0x30000000); 1397 /* set light 1 to infinite type, other lights remain off */ 1398 ACCW(NV10_XFMOD1, 0x00000004); 1399 1400 /* Z-buffer state is: 1401 * initialized, set to: 'fixed point' (integer?); Z-buffer; 16bits depth */ 1402 /* note: 1403 * other options possible are: floating point; 24bits depth; W-buffer */ 1404 ACCW(GLOB_STAT_0, 0x10000000); 1405 /* set DMA instance 2 and 3 to be invalid */ 1406 ACCW(GLOB_STAT_1, 0x00000000); 1407 } 1408 } 1409 1410 static void nv_start_dma(void) 1411 { 1412 uint32 dummy; 1413 1414 if (si->engine.dma.current != si->engine.dma.put) 1415 { 1416 si->engine.dma.put = si->engine.dma.current; 1417 /* flush used caches so we know for sure the DMA cmd buffer received all data. */ 1418 if (si->ps.card_arch < NV40A) 1419 { 1420 /* some CPU's support out-of-order processing (WinChip/Cyrix). Flush them. */ 1421 __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory"); 1422 /* read a non-cached adress to flush the cash */ 1423 dummy = ACCR(STATUS); 1424 } 1425 else 1426 { 1427 /* dummy read the first adress of the framebuffer to flush MTRR-WC buffers */ 1428 dummy = *((volatile uint32 *)(si->framebuffer)); 1429 } 1430 1431 /* actually start DMA to execute all commands now in buffer */ 1432 /* note: 1433 * it doesn't matter which FIFO channel's DMA registers we access, they are in 1434 * fact all the same set. It also doesn't matter if the channel was assigned a 1435 * command or not. */ 1436 /* note also: 1437 * NV_GENERAL_DMAPUT is a write-only register on some cards (confirmed NV11). */ 1438 NV_REG32(NVACC_FIFO + NV_GENERAL_DMAPUT) = (si->engine.dma.put << 2); 1439 } 1440 } 1441 1442 /* this routine does not check the engine's internal hardware FIFO, but the DMA 1443 * command buffer. You can see this as a FIFO as well, that feeds the hardware FIFO. 1444 * The hardware FIFO state is checked by the DMA hardware automatically. */ 1445 static status_t nv_acc_fifofree_dma(uint16 cmd_size) 1446 { 1447 uint32 dmaget; 1448 1449 /* we'd better check for timeouts on the DMA engine as it's theoretically 1450 * breakable by malfunctioning software */ 1451 uint16 cnt = 0; 1452 1453 /* check if the DMA buffer has enough room for the command. 1454 * note: 1455 * engine.dma.free is 'cached' */ 1456 while ((si->engine.dma.free < cmd_size) && (cnt < 10000) && (err < 3)) 1457 { 1458 /* see where the engine is currently fetching from the buffer */ 1459 /* note: 1460 * read this only once in the code as accessing registers is relatively slow */ 1461 /* note also: 1462 * it doesn't matter which FIFO channel's DMA registers we access, they are in 1463 * fact all the same set. It also doesn't matter if the channel was assigned a 1464 * command or not. */ 1465 dmaget = ((NV_REG32(NVACC_FIFO + NV_GENERAL_DMAGET)) >> 2); 1466 1467 /* update timeout counter: on NV11 on a Pentium4 2.8Ghz max reached count 1468 * using BeRoMeter 1.2.6 was about 600; so counting 10000 before generating 1469 * a timeout should definately do it. Snooze()-ing cannot be done without a 1470 * serious speed penalty, even if done for only 1 microSecond. */ 1471 cnt++; 1472 1473 /* where's the engine fetching viewed from us issuing? */ 1474 if (si->engine.dma.put >= dmaget) 1475 { 1476 /* engine is fetching 'behind us', the last piece of the buffer is free */ 1477 1478 /* note the 'updated' free space we have in the DMA buffer */ 1479 si->engine.dma.free = si->engine.dma.max - si->engine.dma.current; 1480 /* if it's enough after all we exit this routine immediately. Else: */ 1481 if (si->engine.dma.free < cmd_size) 1482 { 1483 /* not enough room left, so instruct DMA engine to reset the buffer 1484 * when it's reaching the end of it */ 1485 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x20000000; 1486 /* reset our buffer pointer, so new commands will be placed at the 1487 * beginning of the buffer. */ 1488 si->engine.dma.current = 0; 1489 /* tell the engine to fetch the remaining command(s) in the DMA buffer 1490 * that where not executed before. */ 1491 nv_start_dma(); 1492 1493 /* NOW the engine is fetching 'in front of us', so the first piece 1494 * of the buffer is free */ 1495 1496 /* note the updated current free space we have in the DMA buffer */ 1497 si->engine.dma.free = dmaget - si->engine.dma.current; 1498 /* mind this pittfall: 1499 * Leave some room between where the engine is fetching and where we 1500 * put new commands. Otherwise the engine will crash on heavy loads. 1501 * A crash can be forced best in 640x480x32 mode with BeRoMeter 1.2.6. 1502 * (confirmed on NV11 and NV43 with less than 256 words forced freespace.) 1503 * Note: 1504 * The engine is DMA triggered for fetching chunks every 128 bytes, 1505 * maybe this is the reason for this behaviour. 1506 * Note also: 1507 * it looks like the space that needs to be kept free is coupled 1508 * with the size of the DMA buffer. */ 1509 if (si->engine.dma.free < 256) 1510 si->engine.dma.free = 0; 1511 else 1512 si->engine.dma.free -= 256; 1513 } 1514 } 1515 else 1516 { 1517 /* engine is fetching 'in front of us', so the first piece of the buffer 1518 * is free */ 1519 1520 /* note the updated current free space we have in the DMA buffer */ 1521 si->engine.dma.free = dmaget - si->engine.dma.current; 1522 /* mind this pittfall: 1523 * Leave some room between where the engine is fetching and where we 1524 * put new commands. Otherwise the engine will crash on heavy loads. 1525 * A crash can be forced best in 640x480x32 mode with BeRoMeter 1.2.6. 1526 * (confirmed on NV11 and NV43 with less than 256 words forced freespace.) 1527 * Note: 1528 * The engine is DMA triggered for fetching chunks every 128 bytes, 1529 * maybe this is the reason for this behaviour. 1530 * Note also: 1531 * it looks like the space that needs to be kept free is coupled 1532 * with the size of the DMA buffer. */ 1533 if (si->engine.dma.free < 256) 1534 si->engine.dma.free = 0; 1535 else 1536 si->engine.dma.free -= 256; 1537 } 1538 } 1539 1540 /* log timeout if we had one */ 1541 if (cnt == 10000) 1542 { 1543 if (err < 3) err++; 1544 LOG(4,("ACC_DMA: fifofree; DMA timeout #%d, engine trouble!\n", err)); 1545 } 1546 1547 /* we must make the acceleration routines abort or the driver will hang! */ 1548 if (err >= 3) return B_ERROR; 1549 1550 return B_OK; 1551 } 1552 1553 static void nv_acc_cmd_dma(uint32 cmd, uint16 offset, uint16 size) 1554 { 1555 /* NV_FIFO_DMA_OPCODE: set number of cmd words (b18 - 28); set FIFO offset for 1556 * first cmd word (b2 - 15); set DMA opcode = method (b29 - 31). 1557 * a 'NOP' is the opcode word $00000000. */ 1558 /* note: 1559 * possible DMA opcodes: 1560 * b'000' is 'method' (execute cmd); 1561 * b'001' is 'jump'; 1562 * b'002' is 'noninc method' (execute buffer wrap-around); 1563 * b'003' is 'call': return is executed by opcode word $00020000 (b17 = 1). */ 1564 /* note also: 1565 * this system uses auto-increments for the FIFO offset adresses. Make sure 1566 * to set a new adress if a gap exists between the previous one and the new one. */ 1567 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = ((size << 18) | 1568 ((si->engine.fifo.ch_ptr[cmd] + offset) & 0x0000fffc)); 1569 1570 /* space left after issuing the current command is the cmd AND it's arguments less */ 1571 si->engine.dma.free -= (size + 1); 1572 } 1573 1574 static void nv_acc_set_ch_dma(uint16 ch, uint32 handle) 1575 { 1576 /* issue FIFO channel assign cmd */ 1577 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = ((1 << 18) | ch); 1578 /* set new assignment */ 1579 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = (0x80000000 | handle); 1580 1581 /* space left after issuing the current command is the cmd AND it's arguments less */ 1582 si->engine.dma.free -= 2; 1583 } 1584 1585 /* note: 1586 * switching fifo channel assignments this way has no noticable slowdown: 1587 * measured 0.2% with Quake2. */ 1588 void nv_acc_assert_fifo_dma(void) 1589 { 1590 /* does every engine cmd this accelerant needs have a FIFO channel? */ 1591 //fixme: can probably be optimized for both speed and channel selection... 1592 if (!si->engine.fifo.ch_ptr[NV_ROP5_SOLID] || 1593 !si->engine.fifo.ch_ptr[NV_IMAGE_BLACK_RECTANGLE] || 1594 !si->engine.fifo.ch_ptr[NV_IMAGE_PATTERN] || 1595 !si->engine.fifo.ch_ptr[NV4_SURFACE] || 1596 !si->engine.fifo.ch_ptr[NV_IMAGE_BLIT] || 1597 !si->engine.fifo.ch_ptr[NV4_GDI_RECTANGLE_TEXT] || 1598 !si->engine.fifo.ch_ptr[NV_SCALED_IMAGE_FROM_MEMORY]) 1599 { 1600 uint16 cnt; 1601 1602 /* free the FIFO channels we want from the currently assigned cmd's */ 1603 si->engine.fifo.ch_ptr[si->engine.fifo.handle[0]] = 0; 1604 si->engine.fifo.ch_ptr[si->engine.fifo.handle[1]] = 0; 1605 si->engine.fifo.ch_ptr[si->engine.fifo.handle[2]] = 0; 1606 si->engine.fifo.ch_ptr[si->engine.fifo.handle[3]] = 0; 1607 si->engine.fifo.ch_ptr[si->engine.fifo.handle[4]] = 0; 1608 si->engine.fifo.ch_ptr[si->engine.fifo.handle[5]] = 0; 1609 si->engine.fifo.ch_ptr[si->engine.fifo.handle[6]] = 0; 1610 1611 /* set new object handles */ 1612 si->engine.fifo.handle[0] = NV_ROP5_SOLID; 1613 si->engine.fifo.handle[1] = NV_IMAGE_BLACK_RECTANGLE; 1614 si->engine.fifo.handle[2] = NV_IMAGE_PATTERN; 1615 si->engine.fifo.handle[3] = NV4_SURFACE; 1616 si->engine.fifo.handle[4] = NV_IMAGE_BLIT; 1617 si->engine.fifo.handle[5] = NV4_GDI_RECTANGLE_TEXT; 1618 si->engine.fifo.handle[6] = NV_SCALED_IMAGE_FROM_MEMORY; 1619 1620 /* set handle's pointers to their assigned FIFO channels */ 1621 /* note: 1622 * b0-1 aren't used as adressbits. Using b0 to indicate a valid pointer. */ 1623 for (cnt = 0; cnt < 0x08; cnt++) 1624 { 1625 si->engine.fifo.ch_ptr[(si->engine.fifo.handle[cnt])] = 1626 (0x00000001 + (cnt * 0x00002000)); 1627 } 1628 1629 /* wait for room in fifo for new FIFO assigment cmds if needed. */ 1630 if (nv_acc_fifofree_dma(14) != B_OK) return; 1631 1632 /* program new FIFO assignments */ 1633 /* Raster OPeration: */ 1634 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH0, si->engine.fifo.handle[0]); 1635 /* Clip: */ 1636 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH1, si->engine.fifo.handle[1]); 1637 /* Pattern: */ 1638 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH2, si->engine.fifo.handle[2]); 1639 /* 2D Surface: */ 1640 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH3, si->engine.fifo.handle[3]); 1641 /* Blit: */ 1642 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH4, si->engine.fifo.handle[4]); 1643 /* Bitmap: */ 1644 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH5, si->engine.fifo.handle[5]); 1645 /* Scaled and fitered Blit: */ 1646 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH6, si->engine.fifo.handle[6]); 1647 1648 /* tell the engine to fetch and execute all (new) commands in the DMA buffer */ 1649 nv_start_dma(); 1650 } 1651 } 1652 1653 /* 1654 note: 1655 moved acceleration 'top-level' routines to be integrated in the engine: 1656 it is costly to call the engine for every single function within a loop! 1657 (measured with BeRoMeter 1.2.6: upto 15% speed increase on all CPU's.) 1658 1659 note also: 1660 splitting up each command list into sublists (see routines below) prevents 1661 a lot more nested calls, further increasing the speed with upto 70%. 1662 1663 finally: 1664 sending the sublist to just one single engine command even further increases 1665 speed with upto another 10%. This can't be done for blits though, as this engine- 1666 command's hardware does not support multiple objects. 1667 */ 1668 1669 /* screen to screen blit - i.e. move windows around and scroll within them. */ 1670 void SCREEN_TO_SCREEN_BLIT_DMA(engine_token *et, blit_params *list, uint32 count) 1671 { 1672 uint32 i = 0; 1673 uint16 subcnt; 1674 1675 /*** init acc engine for blit function ***/ 1676 /* ROP registers (Raster OPeration): 1677 * wait for room in fifo for ROP cmd if needed. */ 1678 if (nv_acc_fifofree_dma(2) != B_OK) return; 1679 /* now setup ROP (writing 2 32bit words) for GXcopy */ 1680 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); 1681 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xcc; /* SetRop5 */ 1682 1683 /*** do each blit ***/ 1684 /* Note: 1685 * blit-copy direction is determined inside nvidia hardware: no setup needed */ 1686 while (count) 1687 { 1688 /* break up the list in sublists to minimize calls, while making sure long 1689 * lists still get executed without trouble */ 1690 subcnt = 32; 1691 if (count < 32) subcnt = count; 1692 count -= subcnt; 1693 1694 /* wait for room in fifo for blit cmd if needed. */ 1695 if (nv_acc_fifofree_dma(4 * subcnt) != B_OK) return; 1696 1697 while (subcnt--) 1698 { 1699 /* now setup blit (writing 4 32bit words) */ 1700 nv_acc_cmd_dma(NV_IMAGE_BLIT, NV_IMAGE_BLIT_SOURCEORG, 3); 1701 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1702 (((list[i].src_top) << 16) | (list[i].src_left)); /* SourceOrg */ 1703 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1704 (((list[i].dest_top) << 16) | (list[i].dest_left)); /* DestOrg */ 1705 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1706 ((((list[i].height) + 1) << 16) | ((list[i].width) + 1)); /* HeightWidth */ 1707 1708 i++; 1709 } 1710 1711 /* tell the engine to fetch the commands in the DMA buffer that where not 1712 * executed before. */ 1713 nv_start_dma(); 1714 } 1715 1716 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 1717 si->engine.threeD.reload = 0xffffffff; 1718 } 1719 1720 /* scaled and filtered screen to screen blit - i.e. video playback without overlay */ 1721 /* note: source and destination may not overlap. */ 1722 //fixme? checkout NV5 and NV10 version of cmd: faster?? (or is 0x77 a 'autoselect' version?) 1723 void SCREEN_TO_SCREEN_SCALED_FILTERED_BLIT_DMA(engine_token *et, scaled_blit_params *list, uint32 count) 1724 { 1725 uint32 i = 0; 1726 uint16 subcnt; 1727 uint32 cmd_depth; 1728 uint8 bpp; 1729 1730 /*** init acc engine for scaled filtered blit function ***/ 1731 /* Set pixel width */ 1732 switch(si->dm.space) 1733 { 1734 case B_RGB15_LITTLE: 1735 cmd_depth = 0x00000002; 1736 bpp = 2; 1737 break; 1738 case B_RGB16_LITTLE: 1739 cmd_depth = 0x00000007; 1740 bpp = 2; 1741 break; 1742 case B_RGB32_LITTLE: 1743 case B_RGBA32_LITTLE: 1744 cmd_depth = 0x00000004; 1745 bpp = 4; 1746 break; 1747 /* fixme sometime: 1748 * we could do the spaces below if this function would be modified to be able 1749 * to use a source outside of the desktop, i.e. using offscreen bitmaps... */ 1750 case B_YCbCr422: 1751 cmd_depth = 0x00000005; 1752 bpp = 2; 1753 break; 1754 case B_YUV422: 1755 cmd_depth = 0x00000006; 1756 bpp = 2; 1757 break; 1758 default: 1759 /* note: this function does not support src or dest in the B_CMAP8 space! */ 1760 //fixme: the NV10 version of this cmd supports B_CMAP8 src though... (checkout) 1761 LOG(8,("ACC_DMA: scaled_filtered_blit, invalid bit depth\n")); 1762 return; 1763 } 1764 1765 /* modify surface depth settings for 15-bit colorspace so command works as intended */ 1766 if (si->dm.space == B_RGB15_LITTLE) 1767 { 1768 /* wait for room in fifo for surface setup cmd if needed */ 1769 if (nv_acc_fifofree_dma(2) != B_OK) return; 1770 /* now setup 2D surface (writing 1 32bit word) */ 1771 nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 1); 1772 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000002; /* Format */ 1773 } 1774 1775 /* TNT1 has fixed operation mode 'SRCcopy' while the rest can be programmed: */ 1776 if (si->ps.card_type != NV04) 1777 { 1778 /* wait for room in fifo for cmds if needed. */ 1779 if (nv_acc_fifofree_dma(5) != B_OK) return; 1780 /* now setup source bitmap colorspace */ 1781 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SETCOLORFORMAT, 2); 1782 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ 1783 /* now setup operation mode to SRCcopy */ 1784 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000003; /* SetOperation */ 1785 } 1786 else 1787 { 1788 /* wait for room in fifo for cmd if needed. */ 1789 if (nv_acc_fifofree_dma(4) != B_OK) return; 1790 /* now setup source bitmap colorspace */ 1791 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SETCOLORFORMAT, 1); 1792 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ 1793 /* TNT1 has fixed operation mode SRCcopy */ 1794 } 1795 /* now setup fill color (writing 2 32bit words) */ 1796 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); 1797 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000000; /* Color1A */ 1798 1799 /*** do each blit ***/ 1800 while (count) 1801 { 1802 /* break up the list in sublists to minimize calls, while making sure long 1803 * lists still get executed without trouble */ 1804 subcnt = 16; 1805 if (count < 16) subcnt = count; 1806 count -= subcnt; 1807 1808 /* wait for room in fifo for blit cmd if needed. */ 1809 if (nv_acc_fifofree_dma(12 * subcnt) != B_OK) return; 1810 1811 while (subcnt--) 1812 { 1813 /* now setup blit (writing 12 32bit words) */ 1814 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SOURCEORG, 6); 1815 /* setup dest clipping ref for blit (not used) (b0-15 = left, b16-31 = top) */ 1816 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0; /* SourceOrg */ 1817 /* setup dest clipping size for blit */ 1818 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1819 (((list[i].dest_height + 1) << 16) | (list[i].dest_width + 1)); /* SourceHeightWidth */ 1820 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1821 /* setup destination location and size for blit */ 1822 (((list[i].dest_top) << 16) | (list[i].dest_left)); /* DestOrg */ 1823 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1824 (((list[i].dest_height + 1) << 16) | (list[i].dest_width + 1)); /* DestHeightWidth */ 1825 //fixme: findout scaling limits... (although the current cmd interface doesn't support them.) 1826 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1827 (((list[i].src_width + 1) << 20) / (list[i].dest_width + 1)); /* HorInvScale (in 12.20 format) */ 1828 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1829 (((list[i].src_height + 1) << 20) / (list[i].dest_height + 1)); /* VerInvScale (in 12.20 format) */ 1830 1831 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SOURCESIZE, 4); 1832 /* setup horizontal and vertical source (fetching) ends. 1833 * note: 1834 * horizontal granularity is 2 pixels, vertical granularity is 1 pixel. 1835 * look at Matrox or Neomagic bes engines code for usage example. */ 1836 //fixme: tested 15, 16 and 32-bit RGB depth, verify other depths... 1837 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1838 (((list[i].src_height + 1) << 16) | 1839 (((list[i].src_width + 1) + 0x0001) & ~0x0001)); /* SourceHeightWidth */ 1840 /* setup source pitch (b0-15). Set 'format origin center' (b16-17) and 1841 * select 'format interpolator foh (bilinear filtering)' (b24). */ 1842 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1843 (si->fbc.bytes_per_row | (1 << 16) | (1 << 24)); /* SourcePitch */ 1844 /* setup source surface location */ 1845 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1846 ((uint32)((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)) + 1847 (list[i].src_top * si->fbc.bytes_per_row) + (list[i].src_left * bpp); /* Offset */ 1848 /* setup source start: first (sub)pixel contributing to output picture */ 1849 /* note: 1850 * clipping is not asked for. 1851 * look at nVidia NV10+ bes engine code for useage example. */ 1852 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1853 0; /* SourceRef (b0-15 = hor, b16-31 = ver: both in 12.4 format) */ 1854 1855 i++; 1856 } 1857 1858 /* tell the engine to fetch the commands in the DMA buffer that where not 1859 * executed before. */ 1860 nv_start_dma(); 1861 } 1862 1863 /* reset surface depth settings so the other engine commands works as intended */ 1864 if (si->dm.space == B_RGB15_LITTLE) 1865 { 1866 /* wait for room in fifo for surface setup cmd if needed */ 1867 if (nv_acc_fifofree_dma(2) != B_OK) return; 1868 /* now setup 2D surface (writing 1 32bit word) */ 1869 nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 1); 1870 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000004; /* Format */ 1871 1872 /* tell the engine to fetch the commands in the DMA buffer that where not 1873 * executed before. */ 1874 nv_start_dma(); 1875 } 1876 1877 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 1878 si->engine.threeD.reload = 0xffffffff; 1879 } 1880 1881 /* rectangle fill - i.e. workspace and window background color */ 1882 void FILL_RECTANGLE_DMA(engine_token *et, uint32 colorIndex, fill_rect_params *list, uint32 count) 1883 { 1884 uint32 i = 0; 1885 uint16 subcnt; 1886 1887 /*** init acc engine for fill function ***/ 1888 /* ROP registers (Raster OPeration): 1889 * wait for room in fifo for ROP and bitmap cmd if needed. */ 1890 if (nv_acc_fifofree_dma(4) != B_OK) return; 1891 /* now setup ROP (writing 2 32bit words) for GXcopy */ 1892 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); 1893 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xcc; /* SetRop5 */ 1894 /* now setup fill color (writing 2 32bit words) */ 1895 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); 1896 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = colorIndex; /* Color1A */ 1897 1898 /*** draw each rectangle ***/ 1899 while (count) 1900 { 1901 /* break up the list in sublists to minimize calls, while making sure long 1902 * lists still get executed without trouble */ 1903 subcnt = 32; 1904 if (count < 32) subcnt = count; 1905 count -= subcnt; 1906 1907 /* wait for room in fifo for bitmap cmd if needed. */ 1908 if (nv_acc_fifofree_dma(1 + (2 * subcnt)) != B_OK) return; 1909 1910 /* issue fill command once... */ 1911 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, (2 * subcnt)); 1912 /* ... and send multiple rects (engine cmd supports 32 max) */ 1913 while (subcnt--) 1914 { 1915 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1916 (((list[i].left) << 16) | ((list[i].top) & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */ 1917 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1918 (((((list[i].right)+1) - (list[i].left)) << 16) | 1919 (((list[i].bottom-list[i].top)+1) & 0x0000ffff)); /* Unclipped Rect 0 WidthHeight */ 1920 1921 i++; 1922 } 1923 1924 /* tell the engine to fetch the commands in the DMA buffer that where not 1925 * executed before. */ 1926 nv_start_dma(); 1927 } 1928 1929 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 1930 si->engine.threeD.reload = 0xffffffff; 1931 } 1932 1933 /* span fill - i.e. (selected) menuitem background color (Dano) */ 1934 void FILL_SPAN_DMA(engine_token *et, uint32 colorIndex, uint16 *list, uint32 count) 1935 { 1936 uint32 i = 0; 1937 uint16 subcnt; 1938 1939 /*** init acc engine for fill function ***/ 1940 /* ROP registers (Raster OPeration): 1941 * wait for room in fifo for ROP and bitmap cmd if needed. */ 1942 if (nv_acc_fifofree_dma(4) != B_OK) return; 1943 /* now setup ROP (writing 2 32bit words) for GXcopy */ 1944 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); 1945 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xcc; /* SetRop5 */ 1946 /* now setup fill color (writing 2 32bit words) */ 1947 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); 1948 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = colorIndex; /* Color1A */ 1949 1950 /*** draw each span ***/ 1951 while (count) 1952 { 1953 /* break up the list in sublists to minimize calls, while making sure long 1954 * lists still get executed without trouble */ 1955 subcnt = 32; 1956 if (count < 32) subcnt = count; 1957 count -= subcnt; 1958 1959 /* wait for room in fifo for bitmap cmd if needed. */ 1960 if (nv_acc_fifofree_dma(1 + (2 * subcnt)) != B_OK) return; 1961 1962 /* issue fill command once... */ 1963 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, (2 * subcnt)); 1964 /* ... and send multiple rects (spans) (engine cmd supports 32 max) */ 1965 while (subcnt--) 1966 { 1967 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1968 (((list[i+1]) << 16) | ((list[i]) & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */ 1969 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1970 ((((list[i+2]+1) - (list[i+1])) << 16) | 0x00000001); /* Unclipped Rect 0 WidthHeight */ 1971 1972 i+=3; 1973 } 1974 1975 /* tell the engine to fetch the commands in the DMA buffer that where not 1976 * executed before. */ 1977 nv_start_dma(); 1978 } 1979 1980 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 1981 si->engine.threeD.reload = 0xffffffff; 1982 } 1983 1984 /* rectangle invert - i.e. text cursor and text selection */ 1985 void INVERT_RECTANGLE_DMA(engine_token *et, fill_rect_params *list, uint32 count) 1986 { 1987 uint32 i = 0; 1988 uint16 subcnt; 1989 1990 /*** init acc engine for invert function ***/ 1991 /* ROP registers (Raster OPeration): 1992 * wait for room in fifo for ROP and bitmap cmd if needed. */ 1993 if (nv_acc_fifofree_dma(4) != B_OK) return; 1994 /* now setup ROP (writing 2 32bit words) for GXinvert */ 1995 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); 1996 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x55; /* SetRop5 */ 1997 /* now reset fill color (writing 2 32bit words) */ 1998 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); 1999 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000000; /* Color1A */ 2000 2001 /*** invert each rectangle ***/ 2002 while (count) 2003 { 2004 /* break up the list in sublists to minimize calls, while making sure long 2005 * lists still get executed without trouble */ 2006 subcnt = 32; 2007 if (count < 32) subcnt = count; 2008 count -= subcnt; 2009 2010 /* wait for room in fifo for bitmap cmd if needed. */ 2011 if (nv_acc_fifofree_dma(1 + (2 * subcnt)) != B_OK) return; 2012 2013 /* issue fill command once... */ 2014 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, (2 * subcnt)); 2015 /* ... and send multiple rects (engine cmd supports 32 max) */ 2016 while (subcnt--) 2017 { 2018 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 2019 (((list[i].left) << 16) | ((list[i].top) & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */ 2020 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 2021 (((((list[i].right)+1) - (list[i].left)) << 16) | 2022 (((list[i].bottom-list[i].top)+1) & 0x0000ffff)); /* Unclipped Rect 0 WidthHeight */ 2023 2024 i++; 2025 } 2026 2027 /* tell the engine to fetch the commands in the DMA buffer that where not 2028 * executed before. */ 2029 nv_start_dma(); 2030 } 2031 2032 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 2033 si->engine.threeD.reload = 0xffffffff; 2034 } 2035