1 /* NV Acceleration functions */ 2 3 /* Author: 4 Rudolf Cornelissen 8/2003-4/2006. 5 6 This code was possible thanks to: 7 - the Linux XFree86 NV driver, 8 - the Linux UtahGLX 3D driver. 9 */ 10 11 #define MODULE_BIT 0x00080000 12 13 #include "nv_std.h" 14 15 /*acceleration notes*/ 16 17 /*functions Be's app_server uses: 18 fill span (horizontal only) 19 fill rectangle (these 2 are very similar) 20 invert rectangle 21 blit 22 */ 23 24 static void nv_init_for_3D_dma(void); 25 static void nv_start_dma(void); 26 static status_t nv_acc_fifofree_dma(uint16 cmd_size); 27 static void nv_acc_cmd_dma(uint32 cmd, uint16 offset, uint16 size); 28 static void nv_acc_set_ch_dma(uint16 ch, uint32 handle); 29 30 /* used to track engine DMA stalls */ 31 static uint8 err; 32 33 /* wait until engine completely idle */ 34 status_t nv_acc_wait_idle_dma() 35 { 36 /* we'd better check for timeouts on the DMA engine as it's theoretically 37 * breakable by malfunctioning software */ 38 uint16 cnt = 0; 39 40 /* wait until all upcoming commands are in execution at least. Do this until 41 * we hit a timeout; abort if we failed at least three times before: 42 * if DMA stalls, we have to forget about it alltogether at some point, or 43 * the system will almost come to a complete halt.. */ 44 /* note: 45 * it doesn't matter which FIFO channel's DMA registers we access, they are in 46 * fact all the same set. It also doesn't matter if the channel was assigned a 47 * command or not. */ 48 while ((NV_REG32(NVACC_FIFO + NV_GENERAL_DMAGET) != (si->engine.dma.put << 2)) && 49 (cnt < 10000) && (err < 3)) 50 { 51 /* snooze a bit so I do not hammer the bus */ 52 snooze (100); 53 cnt++; 54 } 55 56 /* log timeout if we had one */ 57 if (cnt == 10000) 58 { 59 if (err < 3) err++; 60 LOG(4,("ACC_DMA: wait_idle; DMA timeout #%d, engine trouble!\n", err)); 61 } 62 63 /* wait until execution completed */ 64 while (ACCR(STATUS)) 65 { 66 /* snooze a bit so I do not hammer the bus */ 67 snooze (100); 68 } 69 70 return B_OK; 71 } 72 73 /* AFAIK this must be done for every new screenmode. 74 * Engine required init. */ 75 status_t nv_acc_init_dma() 76 { 77 uint32 cnt, tmp; 78 uint32 surf_depth, cmd_depth; 79 /* reset the engine DMA stalls counter */ 80 err = 0; 81 82 /* a hanging engine only recovers from a complete power-down/power-up cycle */ 83 NV_REG32(NV32_PWRUPCTRL) = 0x13110011; 84 snooze(1000); 85 NV_REG32(NV32_PWRUPCTRL) = 0x13111111; 86 87 /* don't try this on NV20 and later.. */ 88 /* note: 89 * the specific register that's responsible for the speedfix on NV18 is 90 * $00400ed8: bit 6 needs to be zero for fastest rendering (confirmed). */ 91 /* note also: 92 * on NV28 the following ranges could be reset (confirmed): 93 * $00400000 upto/incl. $004002fc; 94 * $00400400 upto/incl. $004017fc; 95 * $0040180c upto/incl. $00401948; 96 * $00401994 upto/incl. $00401a80; 97 * $00401a94 upto/incl. $00401ffc. 98 * The intermediate ranges hang the engine upon resetting. */ 99 if (si->ps.card_arch < NV20A) 100 { 101 /* actively reset the PGRAPH registerset (acceleration engine) */ 102 for (cnt = 0x00400000; cnt < 0x00402000; cnt +=4) 103 { 104 NV_REG32(cnt) = 0x00000000; 105 } 106 } 107 108 /* setup PTIMER: */ 109 //fixme? how about NV28 setup as just after coldstarting? (see nv_info.c) 110 /* set timer numerator to 8 (in b0-15) */ 111 ACCW(PT_NUMERATOR, 0x00000008); 112 /* set timer denominator to 3 (in b0-15) */ 113 ACCW(PT_DENOMINATR, 0x00000003); 114 115 /* disable timer-alarm INT requests (b0) */ 116 ACCW(PT_INTEN, 0x00000000); 117 /* reset timer-alarm INT status bit (b0) */ 118 ACCW(PT_INTSTAT, 0xffffffff); 119 120 /* enable PRAMIN write access on pre NV10 before programming it! */ 121 if (si->ps.card_arch == NV04A) 122 { 123 /* set framebuffer config: type = notiling, PRAMIN write access enabled */ 124 NV_REG32(NV32_PFB_CONFIG_0) = 0x00001114; 125 } 126 else 127 { 128 /* setup acc engine 'source' tile adressranges */ 129 if ((si->ps.card_type <= NV40) || (si->ps.card_type == NV45)) 130 { 131 ACCW(NV10_FBTIL0AD, 0); 132 ACCW(NV10_FBTIL1AD, 0); 133 ACCW(NV10_FBTIL2AD, 0); 134 ACCW(NV10_FBTIL3AD, 0); 135 ACCW(NV10_FBTIL4AD, 0); 136 ACCW(NV10_FBTIL5AD, 0); 137 ACCW(NV10_FBTIL6AD, 0); 138 ACCW(NV10_FBTIL7AD, 0); 139 ACCW(NV10_FBTIL0ED, (si->ps.memory_size - 1)); 140 ACCW(NV10_FBTIL1ED, (si->ps.memory_size - 1)); 141 ACCW(NV10_FBTIL2ED, (si->ps.memory_size - 1)); 142 ACCW(NV10_FBTIL3ED, (si->ps.memory_size - 1)); 143 ACCW(NV10_FBTIL4ED, (si->ps.memory_size - 1)); 144 ACCW(NV10_FBTIL5ED, (si->ps.memory_size - 1)); 145 ACCW(NV10_FBTIL6ED, (si->ps.memory_size - 1)); 146 ACCW(NV10_FBTIL7ED, (si->ps.memory_size - 1)); 147 } 148 else 149 { 150 /* NV41, 43, 44, G70 and up */ 151 ACCW(NV41_FBTIL0AD, 0); 152 ACCW(NV41_FBTIL1AD, 0); 153 ACCW(NV41_FBTIL2AD, 0); 154 ACCW(NV41_FBTIL3AD, 0); 155 ACCW(NV41_FBTIL4AD, 0); 156 ACCW(NV41_FBTIL5AD, 0); 157 ACCW(NV41_FBTIL6AD, 0); 158 ACCW(NV41_FBTIL7AD, 0); 159 ACCW(NV41_FBTIL8AD, 0); 160 ACCW(NV41_FBTIL9AD, 0); 161 ACCW(NV41_FBTILAAD, 0); 162 ACCW(NV41_FBTILBAD, 0); 163 ACCW(NV41_FBTIL0ED, (si->ps.memory_size - 1)); 164 ACCW(NV41_FBTIL1ED, (si->ps.memory_size - 1)); 165 ACCW(NV41_FBTIL2ED, (si->ps.memory_size - 1)); 166 ACCW(NV41_FBTIL3ED, (si->ps.memory_size - 1)); 167 ACCW(NV41_FBTIL4ED, (si->ps.memory_size - 1)); 168 ACCW(NV41_FBTIL5ED, (si->ps.memory_size - 1)); 169 ACCW(NV41_FBTIL6ED, (si->ps.memory_size - 1)); 170 ACCW(NV41_FBTIL7ED, (si->ps.memory_size - 1)); 171 ACCW(NV41_FBTIL8ED, (si->ps.memory_size - 1)); 172 ACCW(NV41_FBTIL9ED, (si->ps.memory_size - 1)); 173 ACCW(NV41_FBTILAED, (si->ps.memory_size - 1)); 174 ACCW(NV41_FBTILBED, (si->ps.memory_size - 1)); 175 176 if (si->ps.card_type >= G70) 177 { 178 ACCW(G70_FBTILCAD, 0); 179 ACCW(G70_FBTILDAD, 0); 180 ACCW(G70_FBTILEAD, 0); 181 ACCW(G70_FBTILCED, (si->ps.memory_size - 1)); 182 ACCW(G70_FBTILDED, (si->ps.memory_size - 1)); 183 ACCW(G70_FBTILEED, (si->ps.memory_size - 1)); 184 } 185 } 186 } 187 188 /*** PRAMIN ***/ 189 /* first clear the entire RAMHT (hash-table) space to a defined state. It turns 190 * out at least NV11 will keep the previously programmed handles over resets and 191 * power-outages upto about 15 seconds!! Faulty entries might well hang the 192 * engine (confirmed on NV11). 193 * Note: 194 * this behaviour is not very strange: even very old DRAM chips are known to be 195 * able to do this, even though you should refresh them every few milliseconds or 196 * so. (Large memory cell capacitors, though different cells vary a lot in their 197 * capacity.) 198 * Of course data validity is not certain by a long shot over this large 199 * amount of time.. */ 200 for(cnt = 0; cnt < 0x0400; cnt++) 201 NV_REG32(NVACC_HT_HANDL_00 + (cnt << 2)) = 0; 202 /* RAMHT (hash-table) space SETUP FIFO HANDLES */ 203 /* note: 204 * 'instance' tells you where the engine command is stored in 'PR_CTXx_x' sets 205 * below: instance being b4-19 with baseadress NV_PRAMIN_CTX_0 (0x00700000). 206 * That command is linked to the handle noted here. This handle is then used to 207 * tell the FIFO to which engine command it is connected! 208 * (CTX registers are actually a sort of RAM space.) */ 209 if (si->ps.card_arch >= NV40A) 210 { 211 /* (first set) */ 212 ACCW(HT_HANDL_00, (0x80000000 | NV10_CONTEXT_SURFACES_2D)); /* 32bit handle (not used) */ 213 ACCW(HT_VALUE_00, 0x0010114c); /* instance $114c, engine = acc engine, CHID = $00 */ 214 215 ACCW(HT_HANDL_01, (0x80000000 | NV_IMAGE_BLIT)); /* 32bit handle */ 216 ACCW(HT_VALUE_01, 0x00101148); /* instance $1148, engine = acc engine, CHID = $00 */ 217 218 ACCW(HT_HANDL_02, (0x80000000 | NV4_GDI_RECTANGLE_TEXT)); /* 32bit handle */ 219 ACCW(HT_VALUE_02, 0x0010114a); /* instance $114a, engine = acc engine, CHID = $00 */ 220 221 /* (second set) */ 222 ACCW(HT_HANDL_10, (0x80000000 | NV_ROP5_SOLID)); /* 32bit handle */ 223 ACCW(HT_VALUE_10, 0x00101142); /* instance $1142, engine = acc engine, CHID = $00 */ 224 225 ACCW(HT_HANDL_11, (0x80000000 | NV_IMAGE_BLACK_RECTANGLE)); /* 32bit handle */ 226 ACCW(HT_VALUE_11, 0x00101144); /* instance $1144, engine = acc engine, CHID = $00 */ 227 228 ACCW(HT_HANDL_12, (0x80000000 | NV_IMAGE_PATTERN)); /* 32bit handle */ 229 ACCW(HT_VALUE_12, 0x00101146); /* instance $1146, engine = acc engine, CHID = $00 */ 230 231 ACCW(HT_HANDL_13, (0x80000000 | NV_SCALED_IMAGE_FROM_MEMORY)); /* 32bit handle */ 232 ACCW(HT_VALUE_13, 0x0010114e); /* instance $114e, engine = acc engine, CHID = $00 */ 233 } 234 else 235 { 236 /* (first set) */ 237 ACCW(HT_HANDL_00, (0x80000000 | NV4_SURFACE)); /* 32bit handle */ 238 ACCW(HT_VALUE_00, 0x80011145); /* instance $1145, engine = acc engine, CHID = $00 */ 239 240 ACCW(HT_HANDL_01, (0x80000000 | NV_IMAGE_BLIT)); /* 32bit handle */ 241 ACCW(HT_VALUE_01, 0x80011146); /* instance $1146, engine = acc engine, CHID = $00 */ 242 243 ACCW(HT_HANDL_02, (0x80000000 | NV4_GDI_RECTANGLE_TEXT)); /* 32bit handle */ 244 ACCW(HT_VALUE_02, 0x80011147); /* instance $1147, engine = acc engine, CHID = $00 */ 245 246 ACCW(HT_HANDL_03, (0x80000000 | NV4_CONTEXT_SURFACES_ARGB_ZS)); /* 32bit handle (3D) */ 247 ACCW(HT_VALUE_03, 0x80011148); /* instance $1148, engine = acc engine, CHID = $00 */ 248 249 /* NV4_ and NV10_DX5_TEXTURE_TRIANGLE should be identical */ 250 ACCW(HT_HANDL_04, (0x80000000 | NV4_DX5_TEXTURE_TRIANGLE)); /* 32bit handle (3D) */ 251 ACCW(HT_VALUE_04, 0x80011149); /* instance $1149, engine = acc engine, CHID = $00 */ 252 253 /* NV4_ and NV10_DX6_MULTI_TEXTURE_TRIANGLE should be identical */ 254 ACCW(HT_HANDL_05, (0x80000000 | NV4_DX6_MULTI_TEXTURE_TRIANGLE)); /* 32bit handle (not used) */ 255 ACCW(HT_VALUE_05, 0x8001114a); /* instance $114a, engine = acc engine, CHID = $00 */ 256 257 ACCW(HT_HANDL_06, (0x80000000 | NV1_RENDER_SOLID_LIN)); /* 32bit handle (not used) */ 258 ACCW(HT_VALUE_06, 0x8001114c); /* instance $114c, engine = acc engine, CHID = $00 */ 259 260 /* (second set) */ 261 ACCW(HT_HANDL_10, (0x80000000 | NV_ROP5_SOLID)); /* 32bit handle */ 262 ACCW(HT_VALUE_10, 0x80011142); /* instance $1142, engine = acc engine, CHID = $00 */ 263 264 ACCW(HT_HANDL_11, (0x80000000 | NV_IMAGE_BLACK_RECTANGLE)); /* 32bit handle */ 265 ACCW(HT_VALUE_11, 0x80011143); /* instance $1143, engine = acc engine, CHID = $00 */ 266 267 ACCW(HT_HANDL_12, (0x80000000 | NV_IMAGE_PATTERN)); /* 32bit handle */ 268 ACCW(HT_VALUE_12, 0x80011144); /* instance $1144, engine = acc engine, CHID = $00 */ 269 270 ACCW(HT_HANDL_13, (0x80000000 | NV_SCALED_IMAGE_FROM_MEMORY)); /* 32bit handle */ 271 ACCW(HT_VALUE_13, 0x8001114b); /* instance $114b, engine = acc engine, CHID = $00 */ 272 } 273 274 /* program CTX registers: CTX1 is mostly done later (colorspace dependant) */ 275 /* note: 276 * CTX determines which HT handles point to what engine commands. */ 277 /* note also: 278 * CTX registers are in fact in the same GPU internal RAM space as the engine's 279 * hashtable. This means that stuff programmed in here also survives resets and 280 * power-outages! (confirmed NV11) */ 281 if (si->ps.card_arch >= NV40A) 282 { 283 /* setup a DMA define for use by command defines below. */ 284 ACCW(PR_CTX0_R, 0x00003000); /* DMA page table present and of linear type; 285 * DMA target node is NVM (non-volatile memory?) 286 * (instead of doing PCI or AGP transfers) */ 287 ACCW(PR_CTX1_R, (si->ps.memory_size - 1)); /* DMA limit: size is all cardRAM */ 288 ACCW(PR_CTX2_R, ((0x00000000 & 0xfffff000) | 0x00000002)); 289 /* DMA access type is READ_AND_WRITE; 290 * memory starts at start of cardRAM (b12-31): 291 * It's adress needs to be at a 4kb boundary! */ 292 ACCW(PR_CTX3_R, 0x00000002); /* unknown (looks like this is rubbish/not needed?) */ 293 /* setup set '0' for cmd NV_ROP5_SOLID */ 294 ACCW(PR_CTX0_0, 0x02080043); /* NVclass $043, patchcfg ROP_AND, nv10+: little endian */ 295 ACCW(PR_CTX1_0, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 296 ACCW(PR_CTX2_0, 0x00000000); /* DMA0 and DMA1 instance invalid */ 297 ACCW(PR_CTX3_0, 0x00000000); /* method traps disabled */ 298 ACCW(PR_CTX0_1, 0x00000000); /* extra */ 299 ACCW(PR_CTX1_1, 0x00000000); /* extra */ 300 /* setup set '1' for cmd NV_IMAGE_BLACK_RECTANGLE */ 301 ACCW(PR_CTX0_2, 0x02080019); /* NVclass $019, patchcfg ROP_AND, nv10+: little endian */ 302 ACCW(PR_CTX1_2, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 303 ACCW(PR_CTX2_2, 0x00000000); /* DMA0 and DMA1 instance invalid */ 304 ACCW(PR_CTX3_2, 0x00000000); /* method traps disabled */ 305 ACCW(PR_CTX0_3, 0x00000000); /* extra */ 306 ACCW(PR_CTX1_3, 0x00000000); /* extra */ 307 /* setup set '2' for cmd NV_IMAGE_PATTERN */ 308 ACCW(PR_CTX0_4, 0x02080018); /* NVclass $018, patchcfg ROP_AND, nv10+: little endian */ 309 ACCW(PR_CTX1_4, 0x02000000); /* colorspace not set, notify instance is $0200 (b16-31) */ 310 ACCW(PR_CTX2_4, 0x00000000); /* DMA0 and DMA1 instance invalid */ 311 ACCW(PR_CTX3_4, 0x00000000); /* method traps disabled */ 312 ACCW(PR_CTX0_5, 0x00000000); /* extra */ 313 ACCW(PR_CTX1_5, 0x00000000); /* extra */ 314 /* setup set '4' for cmd NV12_IMAGE_BLIT */ 315 ACCW(PR_CTX0_6, 0x0208009f); /* NVclass $09f, patchcfg ROP_AND, nv10+: little endian */ 316 ACCW(PR_CTX1_6, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 317 ACCW(PR_CTX2_6, 0x00001140); /* DMA0 instance is $1140, DMA1 instance invalid */ 318 ACCW(PR_CTX3_6, 0x00001140); /* method trap 0 is $1140, trap 1 disabled */ 319 ACCW(PR_CTX0_7, 0x00000000); /* extra */ 320 ACCW(PR_CTX1_7, 0x00000000); /* extra */ 321 /* setup set '5' for cmd NV4_GDI_RECTANGLE_TEXT */ 322 ACCW(PR_CTX0_8, 0x0208004a); /* NVclass $04a, patchcfg ROP_AND, nv10+: little endian */ 323 ACCW(PR_CTX1_8, 0x02000000); /* colorspace not set, notify instance is $0200 (b16-31) */ 324 ACCW(PR_CTX2_8, 0x00000000); /* DMA0 and DMA1 instance invalid */ 325 ACCW(PR_CTX3_8, 0x00000000); /* method traps disabled */ 326 ACCW(PR_CTX0_9, 0x00000000); /* extra */ 327 ACCW(PR_CTX1_9, 0x00000000); /* extra */ 328 /* setup set '6' for cmd NV10_CONTEXT_SURFACES_2D */ 329 ACCW(PR_CTX0_A, 0x02080062); /* NVclass $062, nv10+: little endian */ 330 ACCW(PR_CTX1_A, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 331 ACCW(PR_CTX2_A, 0x00001140); /* DMA0 instance is $1140, DMA1 instance invalid */ 332 ACCW(PR_CTX3_A, 0x00001140); /* method trap 0 is $1140, trap 1 disabled */ 333 ACCW(PR_CTX0_B, 0x00000000); /* extra */ 334 ACCW(PR_CTX1_B, 0x00000000); /* extra */ 335 /* setup set '7' for cmd NV_SCALED_IMAGE_FROM_MEMORY */ 336 ACCW(PR_CTX0_C, 0x02080077); /* NVclass $077, nv10+: little endian */ 337 ACCW(PR_CTX1_C, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 338 ACCW(PR_CTX2_C, 0x00001140); /* DMA0 instance is $1140, DMA1 instance invalid */ 339 ACCW(PR_CTX3_C, 0x00001140); /* method trap 0 is $1140, trap 1 disabled */ 340 ACCW(PR_CTX0_D, 0x00000000); /* extra */ 341 ACCW(PR_CTX1_D, 0x00000000); /* extra */ 342 /* setup DMA set pointed at by PF_CACH1_DMAI */ 343 ACCW(PR_CTX0_E, 0x00003002); /* DMA page table present and of linear type; 344 * DMA class is $002 (b0-11); 345 * DMA target node is NVM (non-volatile memory?) 346 * (instead of doing PCI or AGP transfers) */ 347 ACCW(PR_CTX1_E, 0x00007fff); /* DMA limit: tablesize is 32k bytes */ 348 ACCW(PR_CTX2_E, (((si->ps.memory_size - 1) & 0xffff8000) | 0x00000002)); 349 /* DMA access type is READ_AND_WRITE; 350 * table is located at end of cardRAM (b12-31): 351 * It's adress needs to be at a 4kb boundary! */ 352 } 353 else 354 { 355 /* setup a DMA define for use by command defines below. */ 356 ACCW(PR_CTX0_R, 0x00003000); /* DMA page table present and of linear type; 357 * DMA target node is NVM (non-volatile memory?) 358 * (instead of doing PCI or AGP transfers) */ 359 ACCW(PR_CTX1_R, (si->ps.memory_size - 1)); /* DMA limit: size is all cardRAM */ 360 ACCW(PR_CTX2_R, ((0x00000000 & 0xfffff000) | 0x00000002)); 361 /* DMA access type is READ_AND_WRITE; 362 * memory starts at start of cardRAM (b12-31): 363 * It's adress needs to be at a 4kb boundary! */ 364 ACCW(PR_CTX3_R, 0x00000002); /* unknown (looks like this is rubbish/not needed?) */ 365 /* setup set '0' for cmd NV_ROP5_SOLID */ 366 ACCW(PR_CTX0_0, 0x01008043); /* NVclass $043, patchcfg ROP_AND, nv10+: little endian */ 367 ACCW(PR_CTX1_0, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 368 ACCW(PR_CTX2_0, 0x00000000); /* DMA0 and DMA1 instance invalid */ 369 ACCW(PR_CTX3_0, 0x00000000); /* method traps disabled */ 370 /* setup set '1' for cmd NV_IMAGE_BLACK_RECTANGLE */ 371 ACCW(PR_CTX0_1, 0x01008019); /* NVclass $019, patchcfg ROP_AND, nv10+: little endian */ 372 ACCW(PR_CTX1_1, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 373 ACCW(PR_CTX2_1, 0x00000000); /* DMA0 and DMA1 instance invalid */ 374 ACCW(PR_CTX3_1, 0x00000000); /* method traps disabled */ 375 /* setup set '2' for cmd NV_IMAGE_PATTERN */ 376 ACCW(PR_CTX0_2, 0x01008018); /* NVclass $018, patchcfg ROP_AND, nv10+: little endian */ 377 ACCW(PR_CTX1_2, 0x00000002); /* colorspace not set, notify instance is $0200 (b16-31) */ 378 ACCW(PR_CTX2_2, 0x00000000); /* DMA0 and DMA1 instance invalid */ 379 ACCW(PR_CTX3_2, 0x00000000); /* method traps disabled */ 380 /* setup set '3' for ... */ 381 if(si->ps.card_arch >= NV10A) 382 { 383 /* ... cmd NV10_CONTEXT_SURFACES_2D */ 384 ACCW(PR_CTX0_3, 0x01008062); /* NVclass $062, nv10+: little endian */ 385 } 386 else 387 { 388 /* ... cmd NV4_SURFACE */ 389 ACCW(PR_CTX0_3, 0x01008042); /* NVclass $042, nv10+: little endian */ 390 } 391 ACCW(PR_CTX1_3, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 392 ACCW(PR_CTX2_3, 0x11401140); /* DMA0 instance is $1140, DMA1 instance invalid */ 393 ACCW(PR_CTX3_3, 0x00000000); /* method trap 0 is $1140, trap 1 disabled */ 394 /* setup set '4' for ... */ 395 if (si->ps.card_type >= NV11) 396 { 397 /* ... cmd NV12_IMAGE_BLIT */ 398 ACCW(PR_CTX0_4, 0x0100809f); /* NVclass $09f, patchcfg ROP_AND, nv10+: little endian */ 399 } 400 else 401 { 402 /* ... cmd NV_IMAGE_BLIT */ 403 ACCW(PR_CTX0_4, 0x0100805f); /* NVclass $05f, patchcfg ROP_AND, nv10+: little endian */ 404 } 405 ACCW(PR_CTX1_4, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 406 ACCW(PR_CTX2_4, 0x11401140); /* DMA0 instance is $1140, DMA1 instance invalid */ 407 ACCW(PR_CTX3_4, 0x00000000); /* method trap 0 is $1140, trap 1 disabled */ 408 /* setup set '5' for cmd NV4_GDI_RECTANGLE_TEXT */ 409 ACCW(PR_CTX0_5, 0x0100804a); /* NVclass $04a, patchcfg ROP_AND, nv10+: little endian */ 410 ACCW(PR_CTX1_5, 0x00000002); /* colorspace not set, notify instance is $0200 (b16-31) */ 411 ACCW(PR_CTX2_5, 0x00000000); /* DMA0 and DMA1 instance invalid */ 412 ACCW(PR_CTX3_5, 0x00000000); /* method traps disabled */ 413 /* setup set '6' ... */ 414 if (si->ps.card_arch >= NV10A) 415 { 416 /* ... for cmd NV10_CONTEXT_SURFACES_ARGB_ZS */ 417 ACCW(PR_CTX0_6, 0x00000093); /* NVclass $093, nv10+: little endian */ 418 } 419 else 420 { 421 /* ... for cmd NV4_CONTEXT_SURFACES_ARGB_ZS */ 422 ACCW(PR_CTX0_6, 0x00000053); /* NVclass $053, nv10+: little endian */ 423 } 424 ACCW(PR_CTX1_6, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 425 ACCW(PR_CTX2_6, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 426 ACCW(PR_CTX3_6, 0x00000000); /* method traps disabled */ 427 /* setup set '7' ... */ 428 if (si->ps.card_arch >= NV10A) 429 { 430 /* ... for cmd NV10_DX5_TEXTURE_TRIANGLE */ 431 ACCW(PR_CTX0_7, 0x0300a094); /* NVclass $094, patchcfg ROP_AND, userclip enable, 432 * context surface0 valid, nv10+: little endian */ 433 } 434 else 435 { 436 /* ... for cmd NV4_DX5_TEXTURE_TRIANGLE */ 437 ACCW(PR_CTX0_7, 0x0300a054); /* NVclass $054, patchcfg ROP_AND, userclip enable, 438 * context surface0 valid */ 439 } 440 ACCW(PR_CTX1_7, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 441 ACCW(PR_CTX2_7, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 442 ACCW(PR_CTX3_7, 0x00000000); /* method traps disabled */ 443 /* setup set '8' ... */ 444 if (si->ps.card_arch >= NV10A) 445 { 446 /* ... for cmd NV10_DX6_MULTI_TEXTURE_TRIANGLE (not used) */ 447 ACCW(PR_CTX0_8, 0x0300a095); /* NVclass $095, patchcfg ROP_AND, userclip enable, 448 * context surface0 valid, nv10+: little endian */ 449 } 450 else 451 { 452 /* ... for cmd NV4_DX6_MULTI_TEXTURE_TRIANGLE (not used) */ 453 ACCW(PR_CTX0_8, 0x0300a055); /* NVclass $055, patchcfg ROP_AND, userclip enable, 454 * context surface0 valid */ 455 } 456 ACCW(PR_CTX1_8, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 457 ACCW(PR_CTX2_8, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 458 ACCW(PR_CTX3_8, 0x00000000); /* method traps disabled */ 459 /* setup set '9' for cmd NV_SCALED_IMAGE_FROM_MEMORY */ 460 ACCW(PR_CTX0_9, 0x01018077); /* NVclass $077, patchcfg SRC_COPY, 461 * context surface0 valid, nv10+: little endian */ 462 ACCW(PR_CTX1_9, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 463 ACCW(PR_CTX2_9, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 464 ACCW(PR_CTX3_9, 0x00000000); /* method traps disabled */ 465 /* setup set 'A' for cmd NV1_RENDER_SOLID_LIN (not used) */ 466 ACCW(PR_CTX0_A, 0x0300a01c); /* NVclass $01c, patchcfg ROP_AND, userclip enable, 467 * context surface0 valid, nv10+: little endian */ 468 ACCW(PR_CTX1_A, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 469 ACCW(PR_CTX2_A, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 470 ACCW(PR_CTX3_A, 0x00000000); /* method traps disabled */ 471 /* setup DMA set pointed at by PF_CACH1_DMAI */ 472 if (si->engine.agp_mode) 473 { 474 /* DMA page table present and of linear type; 475 * DMA class is $002 (b0-11); 476 * DMA target node is AGP */ 477 ACCW(PR_CTX0_B, 0x00033002); 478 } 479 else 480 { 481 /* DMA page table present and of linear type; 482 * DMA class is $002 (b0-11); 483 * DMA target node is PCI */ 484 ACCW(PR_CTX0_B, 0x00023002); 485 } 486 ACCW(PR_CTX1_B, 0x000fffff); /* DMA limit: tablesize is 1M bytes */ 487 ACCW(PR_CTX2_B, (((uint32)((uint8 *)(si->dma_buffer_pci))) | 0x00000002)); 488 /* DMA access type is READ_AND_WRITE; 489 * table is located in main system RAM (b12-31): 490 * It's adress needs to be at a 4kb boundary! */ 491 492 /* set the 3D rendering functions colordepth via BPIXEL's 'depth 2' */ 493 /* note: 494 * setting a depth to 'invalid' (zero) makes the engine report 495 * ready with drawing 'immediately'. */ 496 //fixme: NV30A and above (probably) needs to be corrected... 497 switch(si->dm.space) 498 { 499 case B_CMAP8: 500 if (si->ps.card_arch < NV30A) 501 /* set depth 2: $1 = Y8 */ 502 ACCW(BPIXEL, 0x00000100); 503 else 504 /* set depth 0-1: $1 = Y8, $2 = X1R5G5B5_Z1R5G5B5 */ 505 ACCW(BPIXEL, 0x00000021); 506 break; 507 case B_RGB15_LITTLE: 508 if (si->ps.card_arch < NV30A) 509 /* set depth 2: $4 = A1R5G5B5 */ 510 ACCW(BPIXEL, 0x00000400); 511 else 512 /* set depth 0-1: $2 = X1R5G5B5_Z1R5G5B5, $4 = A1R5G5B5 */ 513 ACCW(BPIXEL, 0x00000042); 514 break; 515 case B_RGB16_LITTLE: 516 if (si->ps.card_arch < NV30A) 517 /* set depth 2: $5 = R5G6B5 */ 518 ACCW(BPIXEL, 0x00000500); 519 else 520 /* set depth 0-1: $5 = R5G6B5, $a = X1A7R8G8B8_O1A7R8G8B8 */ 521 ACCW(BPIXEL, 0x000000a5); 522 break; 523 case B_RGB32_LITTLE: 524 case B_RGBA32_LITTLE: 525 if (si->ps.card_arch < NV30A) 526 /* set depth 2: $c = A8R8G8B8 */ 527 ACCW(BPIXEL, 0x00000c00); 528 else 529 /* set depth 0-1: $7 = X8R8G8B8_Z8R8G8B8, $e = V8YB8U8YA8 */ 530 ACCW(BPIXEL, 0x000000e7); 531 break; 532 default: 533 LOG(8,("ACC: init, invalid bit depth\n")); 534 return B_ERROR; 535 } 536 } 537 538 if (si->ps.card_arch == NV04A) 539 { 540 /* do a explicit engine reset */ 541 ACCW(DEBUG0, 0x000001ff); 542 543 /* init some function blocks */ 544 /* DEBUG0, b20 and b21 should be high, this has a big influence on 545 * 3D rendering speed! (on all cards, confirmed) */ 546 ACCW(DEBUG0, 0x1230c000); 547 /* DEBUG1, b19 = 1 increases 3D rendering speed on TNT2 (M64) a bit, 548 * TNT1 rendering speed stays the same (all cards confirmed) */ 549 ACCW(DEBUG1, 0x72191101); 550 ACCW(DEBUG2, 0x11d5f071); 551 ACCW(DEBUG3, 0x0004ff31); 552 /* init OP methods */ 553 ACCW(DEBUG3, 0x4004ff31); 554 555 /* disable all acceleration engine INT reguests */ 556 ACCW(ACC_INTE, 0x00000000); 557 /* reset all acceration engine INT status bits */ 558 ACCW(ACC_INTS, 0xffffffff); 559 /* context control enabled */ 560 ACCW(NV04_CTX_CTRL, 0x10010100); 561 /* all acceleration buffers, pitches and colors are valid */ 562 ACCW(NV04_ACC_STAT, 0xffffffff); 563 /* enable acceleration engine command FIFO */ 564 ACCW(FIFO_EN, 0x00000001); 565 566 /* setup location of active screen in framebuffer */ 567 ACCW(OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 568 ACCW(OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 569 /* setup accesible card memory range */ 570 ACCW(BLIMIT0, (si->ps.memory_size - 1)); 571 ACCW(BLIMIT1, (si->ps.memory_size - 1)); 572 573 /* pattern shape value = 8x8, 2 color */ 574 //fixme: not needed, unless the engine has a hardware fault (setting via cmd)! 575 //ACCW(PAT_SHP, 0x00000000); 576 /* Pgraph Beta AND value (fraction) b23-30 */ 577 ACCW(BETA_AND_VAL, 0xffffffff); 578 } 579 else 580 { 581 /* do a explicit engine reset */ 582 ACCW(DEBUG0, 0xffffffff); 583 ACCW(DEBUG0, 0x00000000); 584 /* disable all acceleration engine INT reguests */ 585 ACCW(ACC_INTE, 0x00000000); 586 /* reset all acceration engine INT status bits */ 587 ACCW(ACC_INTS, 0xffffffff); 588 /* context control enabled */ 589 ACCW(NV10_CTX_CTRL, 0x10010100); 590 /* all acceleration buffers, pitches and colors are valid */ 591 ACCW(NV10_ACC_STAT, 0xffffffff); 592 /* enable acceleration engine command FIFO */ 593 ACCW(FIFO_EN, 0x00000001); 594 /* setup surface type: 595 * b1-0 = %01 = surface type is non-swizzle; 596 * this is needed to enable 3D on NV1x (confirmed) and maybe others? */ 597 ACCW(NV10_SURF_TYP, ((ACCR(NV10_SURF_TYP)) & 0x0007ff00)); 598 ACCW(NV10_SURF_TYP, ((ACCR(NV10_SURF_TYP)) | 0x00020101)); 599 } 600 601 if (si->ps.card_arch == NV10A) 602 { 603 /* init some function blocks */ 604 ACCW(DEBUG1, 0x00118700); 605 /* DEBUG2 has a big influence on 3D speed for NV11 and NV15 606 * (confirmed b3 and b18 should both be '1' on both cards!) 607 * (b16 should also be '1', increases 3D speed on NV11 a bit more) */ 608 ACCW(DEBUG2, 0x24fd2ad9); 609 ACCW(DEBUG3, 0x55de0030); 610 /* NV10_DEBUG4 has a big influence on 3D speed for NV11, NV15 and NV18 611 * (confirmed b14 and b15 should both be '1' on these cards!) 612 * (confirmed b8 should be '0' on NV18 to prevent complete engine crash!) */ 613 ACCW(NV10_DEBUG4, 0x0000c000); 614 615 /* copy tile setup stuff from 'source' to acc engine */ 616 for (cnt = 0; cnt < 32; cnt++) 617 { 618 NV_REG32(NVACC_NV10_TIL0AD + (cnt << 2)) = 619 NV_REG32(NVACC_NV10_FBTIL0AD + (cnt << 2)); 620 } 621 622 /* setup location of active screen in framebuffer */ 623 ACCW(OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 624 ACCW(OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 625 /* setup accesible card memory range */ 626 ACCW(BLIMIT0, (si->ps.memory_size - 1)); 627 ACCW(BLIMIT1, (si->ps.memory_size - 1)); 628 629 /* pattern shape value = 8x8, 2 color */ 630 //fixme: not needed, unless the engine has a hardware fault (setting via cmd)! 631 //ACCW(PAT_SHP, 0x00000000); 632 /* Pgraph Beta AND value (fraction) b23-30 */ 633 ACCW(BETA_AND_VAL, 0xffffffff); 634 } 635 636 if (si->ps.card_arch >= NV20A) 637 { 638 switch (si->ps.card_arch) 639 { 640 case NV40A: 641 /* init some function blocks */ 642 ACCW(DEBUG1, 0x401287c0); 643 ACCW(DEBUG3, 0x60de8051); 644 /* disable specific functions, but enable SETUP_SPARE2 register */ 645 ACCW(NV10_DEBUG4, 0x00008000); 646 /* set limit_viol_pix_adress(?): more likely something unknown.. */ 647 ACCW(NV25_WHAT0, 0x00be3c5f); 648 649 /* setup some unknown serially accessed registers (?) */ 650 tmp = (NV_REG32(NV32_NV4X_WHAT0) & 0x000000ff); 651 for (cnt = 0; (tmp && !(tmp & 0x00000001)); tmp >>= 1, cnt++); 652 { 653 ACCW(NV4X_WHAT2, cnt); 654 } 655 656 /* unknown.. */ 657 switch (si->ps.card_type) 658 { 659 case NV40: 660 case NV45: 661 /* and NV48: but these are pgm'd as NV45 currently */ 662 ACCW(NV40_WHAT0, 0x83280fff); 663 ACCW(NV40_WHAT1, 0x000000a0); 664 ACCW(NV40_WHAT2, 0x0078e366); 665 ACCW(NV40_WHAT3, 0x0000014c); 666 break; 667 case NV41: 668 /* and ID == 0x012x: but no cards defined yet */ 669 ACCW(NV40P_WHAT0, 0x83280eff); 670 ACCW(NV40P_WHAT1, 0x000000a0); 671 ACCW(NV40P_WHAT2, 0x007596ff); 672 ACCW(NV40P_WHAT3, 0x00000108); 673 break; 674 case NV43: 675 ACCW(NV40P_WHAT0, 0x83280eff); 676 ACCW(NV40P_WHAT1, 0x000000a0); 677 ACCW(NV40P_WHAT2, 0x0072cb77); 678 ACCW(NV40P_WHAT3, 0x00000108); 679 break; 680 case NV44: 681 case G72: 682 ACCW(NV40P_WHAT0, 0x83280eff); 683 ACCW(NV40P_WHAT1, 0x000000a0); 684 685 NV_REG32(NV32_NV44_WHAT10) = NV_REG32(NV32_NV10STRAPINFO); 686 NV_REG32(NV32_NV44_WHAT11) = 0x00000000; 687 NV_REG32(NV32_NV44_WHAT12) = 0x00000000; 688 NV_REG32(NV32_NV44_WHAT13) = NV_REG32(NV32_NV10STRAPINFO); 689 690 ACCW(NV44_WHAT2, 0x00000000); 691 ACCW(NV44_WHAT3, 0x00000000); 692 break; 693 /* case NV44 type 2: (cardID 0x022x) 694 //fixme if needed: doesn't seem to need the strapinfo thing.. 695 ACCW(NV40P_WHAT0, 0x83280eff); 696 ACCW(NV40P_WHAT1, 0x000000a0); 697 698 ACCW(NV44_WHAT2, 0x00000000); 699 ACCW(NV44_WHAT3, 0x00000000); 700 break; 701 */ case G70: 702 case G71: 703 case G73: 704 ACCW(NV40P_WHAT0, 0x83280eff); 705 ACCW(NV40P_WHAT1, 0x000000a0); 706 ACCW(NV40P_WHAT2, 0x07830610); 707 ACCW(NV40P_WHAT3, 0x0000016a); 708 break; 709 default: 710 ACCW(NV40P_WHAT0, 0x83280eff); 711 ACCW(NV40P_WHAT1, 0x000000a0); 712 break; 713 } 714 715 ACCW(NV10_TIL3PT, 0x2ffff800); 716 ACCW(NV10_TIL3ST, 0x00006000); 717 ACCW(NV4X_WHAT1, 0x01000000); 718 /* engine data source DMA instance = $1140 */ 719 ACCW(NV4X_DMA_SRC, 0x00001140); 720 break; 721 case NV30A: 722 /* init some function blocks, but most is unknown.. */ 723 ACCW(DEBUG1, 0x40108700); 724 ACCW(NV25_WHAT1, 0x00140000); 725 ACCW(DEBUG3, 0xf00e0431); 726 ACCW(NV10_DEBUG4, 0x00008000); 727 ACCW(NV25_WHAT0, 0xf04b1f36); 728 ACCW(NV20_WHAT3, 0x1002d888); 729 ACCW(NV25_WHAT2, 0x62ff007f); 730 break; 731 case NV20A: 732 /* init some function blocks, but most is unknown.. */ 733 ACCW(DEBUG1, 0x00118700); 734 ACCW(DEBUG3, 0xf20e0431); 735 ACCW(NV10_DEBUG4, 0x00000000); 736 ACCW(NV20_WHAT1, 0x00000040); 737 if (si->ps.card_type < NV25) 738 { 739 ACCW(NV20_WHAT2, 0x00080000); 740 ACCW(NV10_DEBUG5, 0x00000005); 741 ACCW(NV20_WHAT3, 0x45caa208); 742 ACCW(NV20_WHAT4, 0x24000000); 743 ACCW(NV20_WHAT5, 0x00000040); 744 745 /* copy some fixed RAM(?) configuration info(?) to some indexed registers: */ 746 /* b16-24 is select; b2-13 is adress in 32-bit words */ 747 ACCW(RDI_INDEX, 0x00e00038); 748 /* data is 32-bit */ 749 ACCW(RDI_DATA, 0x00000030); 750 /* copy some fixed RAM(?) configuration info(?) to some indexed registers: */ 751 /* b16-24 is select; b2-13 is adress in 32-bit words */ 752 ACCW(RDI_INDEX, 0x00e10038); 753 /* data is 32-bit */ 754 ACCW(RDI_DATA, 0x00000030); 755 } 756 else 757 { 758 ACCW(NV25_WHAT1, 0x00080000); 759 ACCW(NV25_WHAT0, 0x304b1fb6); 760 ACCW(NV20_WHAT3, 0x18b82880); 761 ACCW(NV20_WHAT4, 0x44000000); 762 ACCW(NV20_WHAT5, 0x40000080); 763 ACCW(NV25_WHAT2, 0x000000ff); 764 } 765 break; 766 } 767 768 /* NV20A, NV30A and NV40A: */ 769 /* copy tile setup stuff from previous setup 'source' to acc engine 770 * (pattern colorRAM?) */ 771 if ((si->ps.card_type <= NV40) || (si->ps.card_type == NV45)) 772 { 773 for (cnt = 0; cnt < 32; cnt++) 774 { 775 /* copy NV10_FBTIL0AD upto/including NV10_FBTIL7ST */ 776 NV_REG32(NVACC_NV20_WHAT0 + (cnt << 2)) = 777 NV_REG32(NVACC_NV10_FBTIL0AD + (cnt << 2)); 778 779 /* copy NV10_FBTIL0AD upto/including NV10_FBTIL7ST */ 780 NV_REG32(NVACC_NV20_2_WHAT0 + (cnt << 2)) = 781 NV_REG32(NVACC_NV10_FBTIL0AD + (cnt << 2)); 782 } 783 } 784 else 785 { 786 /* NV41, 43, 44, G70 and later */ 787 if (si->ps.card_type >= G70) 788 { 789 for (cnt = 0; cnt < 60; cnt++) 790 { 791 /* copy NV41_FBTIL0AD upto/including G70_FBTILEST */ 792 NV_REG32(NVACC_NV41_WHAT0 + (cnt << 2)) = 793 NV_REG32(NVACC_NV41_FBTIL0AD + (cnt << 2)); 794 795 /* copy NV41_FBTIL0AD upto/including G70_FBTILEST */ 796 NV_REG32(NVACC_NV20_2_WHAT0 + (cnt << 2)) = 797 NV_REG32(NVACC_NV41_FBTIL0AD + (cnt << 2)); 798 } 799 } 800 else 801 { 802 /* NV41, 43, 44 */ 803 for (cnt = 0; cnt < 48; cnt++) 804 { 805 /* copy NV41_FBTIL0AD upto/including NV41_FBTILBST */ 806 NV_REG32(NVACC_NV20_WHAT0 + (cnt << 2)) = 807 NV_REG32(NVACC_NV41_FBTIL0AD + (cnt << 2)); 808 809 if (si->ps.card_type != NV44) 810 { 811 /* copy NV41_FBTIL0AD upto/including NV41_FBTILBST */ 812 NV_REG32(NVACC_NV20_2_WHAT0 + (cnt << 2)) = 813 NV_REG32(NVACC_NV41_FBTIL0AD + (cnt << 2)); 814 } 815 } 816 } 817 } 818 819 if (si->ps.card_arch >= NV40A) 820 { 821 if ((si->ps.card_type == NV40) || (si->ps.card_type == NV45)) 822 { 823 /* copy some RAM configuration info(?) */ 824 ACCW(NV20_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0)); 825 ACCW(NV20_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1)); 826 ACCW(NV40_WHAT_T2, NV_REG32(NV32_PFB_CONFIG_0)); 827 ACCW(NV40_WHAT_T3, NV_REG32(NV32_PFB_CONFIG_1)); 828 829 /* setup location of active screen in framebuffer */ 830 ACCW(NV20_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 831 ACCW(NV20_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 832 /* setup accesible card memory range */ 833 ACCW(NV20_BLIMIT6, (si->ps.memory_size - 1)); 834 ACCW(NV20_BLIMIT7, (si->ps.memory_size - 1)); 835 } 836 else 837 { 838 /* NV41, 43, 44, G70 and later */ 839 840 /* copy some RAM configuration info(?) */ 841 if (si->ps.card_type >= G70) 842 { 843 ACCW(G70_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0)); 844 ACCW(G70_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1)); 845 } 846 else 847 { 848 /* NV41, 43, 44 */ 849 ACCW(NV40P_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0)); 850 ACCW(NV40P_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1)); 851 } 852 ACCW(NV40P_WHAT_T2, NV_REG32(NV32_PFB_CONFIG_0)); 853 ACCW(NV40P_WHAT_T3, NV_REG32(NV32_PFB_CONFIG_1)); 854 855 /* setup location of active screen in framebuffer */ 856 ACCW(NV40P_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 857 ACCW(NV40P_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 858 /* setup accesible card memory range */ 859 ACCW(NV40P_BLIMIT6, (si->ps.memory_size - 1)); 860 ACCW(NV40P_BLIMIT7, (si->ps.memory_size - 1)); 861 } 862 } 863 else /* NV20A and NV30A: */ 864 { 865 /* copy some RAM configuration info(?) */ 866 ACCW(NV20_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0)); 867 ACCW(NV20_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1)); 868 /* copy some RAM configuration info(?) to some indexed registers: */ 869 /* b16-24 is select; b2-13 is adress in 32-bit words */ 870 ACCW(RDI_INDEX, 0x00ea0000); 871 /* data is 32-bit */ 872 ACCW(RDI_DATA, NV_REG32(NV32_PFB_CONFIG_0)); 873 /* b16-24 is select; b2-13 is adress in 32-bit words */ 874 ACCW(RDI_INDEX, 0x00ea0004); 875 /* data is 32-bit */ 876 ACCW(RDI_DATA, NV_REG32(NV32_PFB_CONFIG_1)); 877 878 /* setup location of active screen in framebuffer */ 879 ACCW(NV20_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 880 ACCW(NV20_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 881 /* setup accesible card memory range */ 882 ACCW(NV20_BLIMIT6, (si->ps.memory_size - 1)); 883 ACCW(NV20_BLIMIT7, (si->ps.memory_size - 1)); 884 } 885 886 /* NV20A, NV30A and NV40A: */ 887 /* setup some acc engine tile stuff */ 888 ACCW(NV10_TIL2AD, 0x00000000); 889 ACCW(NV10_TIL0ED, 0xffffffff); 890 } 891 892 /* all cards: */ 893 /* setup clipping: rect size is 32768 x 32768, probably max. setting */ 894 /* note: 895 * can also be done via the NV_IMAGE_BLACK_RECTANGLE engine command. */ 896 ACCW(ABS_UCLP_XMIN, 0x00000000); 897 ACCW(ABS_UCLP_YMIN, 0x00000000); 898 ACCW(ABS_UCLP_XMAX, 0x00007fff); 899 ACCW(ABS_UCLP_YMAX, 0x00007fff); 900 901 /* setup sync parameters for NV12_IMAGE_BLIT command for the current mode: 902 * values given are CRTC vertical counter limit values. The NV12 command will wait 903 * for the specified's CRTC's vertical counter to be in between the given values */ 904 if (si->ps.card_type >= NV11) 905 { 906 ACCW(NV11_CRTC_LO, si->dm.timing.v_display - 1); 907 ACCW(NV11_CRTC_HI, si->dm.timing.v_display + 1); 908 } 909 910 /*** PFIFO ***/ 911 /* (setup caches) */ 912 /* disable caches reassign */ 913 ACCW(PF_CACHES, 0x00000000); 914 /* PFIFO mode: channel 0 is in DMA mode, channels 1 - 32 are in PIO mode */ 915 ACCW(PF_MODE, 0x00000001); 916 /* cache1 push0 access disabled */ 917 ACCW(PF_CACH1_PSH0, 0x00000000); 918 /* cache1 pull0 access disabled */ 919 ACCW(PF_CACH1_PUL0, 0x00000000); 920 /* cache1 push1 mode = DMA */ 921 if (si->ps.card_arch >= NV40A) 922 ACCW(PF_CACH1_PSH1, 0x00010000); 923 else 924 ACCW(PF_CACH1_PSH1, 0x00000100); 925 /* cache1 DMA Put offset = 0 (b2-28) */ 926 ACCW(PF_CACH1_DMAP, 0x00000000); 927 /* cache1 DMA Get offset = 0 (b2-28) */ 928 ACCW(PF_CACH1_DMAG, 0x00000000); 929 /* cache1 DMA instance adress = $114e (b0-15); 930 * instance being b4-19 with baseadress NV_PRAMIN_CTX_0 (0x00700000). */ 931 /* note: 932 * should point to a DMA definition in CTX register space (which is sort of RAM). 933 * This define tells the engine where the DMA cmd buffer is and what it's size is. 934 * Inside that cmd buffer you'll find the actual issued engine commands. */ 935 if (si->ps.card_arch >= NV40A) 936 ACCW(PF_CACH1_DMAI, 0x00001150); 937 else 938 ACCW(PF_CACH1_DMAI, 0x0000114d); 939 /* cache0 push0 access disabled */ 940 ACCW(PF_CACH0_PSH0, 0x00000000); 941 /* cache0 pull0 access disabled */ 942 ACCW(PF_CACH0_PUL0, 0x00000000); 943 /* RAM HT (hash table) baseadress = $10000 (b4-8), size = 4k, 944 * search = 128 (is byte offset between hash 'sets') */ 945 /* note: 946 * so HT base is $00710000, last is $00710fff. 947 * In this space you define the engine command handles (HT_HANDL_XX), which 948 * in turn points to the defines in CTX register space (which is sort of RAM) */ 949 ACCW(PF_RAMHT, 0x03000100); 950 /* RAM FC baseadress = $11000 (b3-8) (size is fixed to 0.5k(?)) */ 951 /* note: 952 * so FC base is $00711000, last is $007111ff. (not used?) */ 953 ACCW(PF_RAMFC, 0x00000110); 954 /* RAM RO baseadress = $11200 (b1-8), size = 0.5k */ 955 /* note: 956 * so RO base is $00711200, last is $007113ff. (not used?) */ 957 /* note also: 958 * This means(?) the PRAMIN CTX registers are accessible from base $00711400. */ 959 ACCW(PF_RAMRO, 0x00000112); 960 /* PFIFO size: ch0-15 = 512 bytes, ch16-31 = 124 bytes */ 961 ACCW(PF_SIZE, 0x0000ffff); 962 /* cache1 hash instance = $ffff (b0-15) */ 963 ACCW(PF_CACH1_HASH, 0x0000ffff); 964 /* disable all PFIFO INTs */ 965 ACCW(PF_INTEN, 0x00000000); 966 /* reset all PFIFO INT status bits */ 967 ACCW(PF_INTSTAT, 0xffffffff); 968 /* cache0 pull0 engine = acceleration engine (graphics) */ 969 ACCW(PF_CACH0_PUL1, 0x00000001); 970 /* cache1 DMA control: disable some stuff */ 971 ACCW(PF_CACH1_DMAC, 0x00000000); 972 /* cache1 engine 0 upto/including 7 is software (could also be graphics or DVD) */ 973 ACCW(PF_CACH1_ENG, 0x00000000); 974 /* cache1 DMA fetch: trigger at 128 bytes, size is 32 bytes, max requests is 15, 975 * use little endian */ 976 ACCW(PF_CACH1_DMAF, 0x000f0078); 977 /* cache1 DMA push: b0 = 1: access is enabled */ 978 ACCW(PF_CACH1_DMAS, 0x00000001); 979 /* cache1 push0 access enabled */ 980 ACCW(PF_CACH1_PSH0, 0x00000001); 981 /* cache1 pull0 access enabled */ 982 ACCW(PF_CACH1_PUL0, 0x00000001); 983 /* cache1 pull1 engine = acceleration engine (graphics) */ 984 ACCW(PF_CACH1_PUL1, 0x00000001); 985 /* enable PFIFO caches reassign */ 986 ACCW(PF_CACHES, 0x00000001); 987 988 /* setup 3D specifics */ 989 nv_init_for_3D_dma(); 990 991 /*** init acceleration engine command info ***/ 992 /* set object handles */ 993 /* note: 994 * probably depending on some other setup, there are 8 or 32 FIFO channels 995 * available. Assuming the current setup only has 8 channels because the 'rest' 996 * isn't setup here... */ 997 si->engine.fifo.handle[0] = NV_ROP5_SOLID; 998 si->engine.fifo.handle[1] = NV_IMAGE_BLACK_RECTANGLE; 999 si->engine.fifo.handle[2] = NV_IMAGE_PATTERN; 1000 si->engine.fifo.handle[3] = NV4_SURFACE; /* NV10_CONTEXT_SURFACES_2D is identical */ 1001 si->engine.fifo.handle[4] = NV_IMAGE_BLIT; 1002 si->engine.fifo.handle[5] = NV4_GDI_RECTANGLE_TEXT; 1003 si->engine.fifo.handle[6] = NV4_CONTEXT_SURFACES_ARGB_ZS;//NV1_RENDER_SOLID_LIN; 1004 si->engine.fifo.handle[7] = NV4_DX5_TEXTURE_TRIANGLE; 1005 /* preset no FIFO channels assigned to cmd's */ 1006 for (cnt = 0; cnt < 0x20; cnt++) 1007 { 1008 si->engine.fifo.ch_ptr[cnt] = 0; 1009 } 1010 /* set handle's pointers to their assigned FIFO channels */ 1011 /* note: 1012 * b0-1 aren't used as adressbits. Using b0 to indicate a valid pointer. */ 1013 for (cnt = 0; cnt < 0x08; cnt++) 1014 { 1015 si->engine.fifo.ch_ptr[(si->engine.fifo.handle[cnt])] = 1016 (0x00000001 + (cnt * 0x00002000)); 1017 } 1018 1019 /*** init DMA command buffer info ***/ 1020 if (si->ps.card_arch >= NV40A) //main mem DMA buf on pre-NV40 1021 { 1022 si->dma_buffer = (void *)((char *)si->framebuffer + 1023 ((si->ps.memory_size - 1) & 0xffff8000)); 1024 } 1025 LOG(4,("ACC_DMA: command buffer is at adress $%08x\n", 1026 ((uint32)(si->dma_buffer)))); 1027 /* we have issued no DMA cmd's to the engine yet */ 1028 si->engine.dma.put = 0; 1029 /* the current first free adress in the DMA buffer is at offset 0 */ 1030 si->engine.dma.current = 0; 1031 /* the DMA buffer can hold 8k 32-bit words (it's 32kb in size), 1032 * or 256k 32-bit words (1Mb in size) dependant on architecture (for now) */ 1033 /* note: 1034 * one word is reserved at the end of the DMA buffer to be able to instruct the 1035 * engine to do a buffer wrap-around! 1036 * (DMA opcode 'noninc method': issue word $20000000.) */ 1037 if (si->ps.card_arch < NV40A) 1038 si->engine.dma.max = ((1 * 1024 * 1024) >> 2) - 1; 1039 else 1040 si->engine.dma.max = 8192 - 1; 1041 /* note the current free space we have left in the DMA buffer */ 1042 si->engine.dma.free = si->engine.dma.max - si->engine.dma.current; 1043 1044 /*** init FIFO via DMA command buffer. ***/ 1045 /* wait for room in fifo for new FIFO assigment cmds if needed: */ 1046 if (si->ps.card_arch >= NV40A) 1047 { 1048 if (nv_acc_fifofree_dma(12) != B_OK) return B_ERROR; 1049 } 1050 else 1051 { 1052 if (nv_acc_fifofree_dma(16) != B_OK) return B_ERROR; 1053 } 1054 1055 /* program new FIFO assignments */ 1056 /* Raster OPeration: */ 1057 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH0, si->engine.fifo.handle[0]); 1058 /* Clip: */ 1059 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH1, si->engine.fifo.handle[1]); 1060 /* Pattern: */ 1061 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH2, si->engine.fifo.handle[2]); 1062 /* 2D Surfaces: */ 1063 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH3, si->engine.fifo.handle[3]); 1064 /* Blit: */ 1065 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH4, si->engine.fifo.handle[4]); 1066 /* Bitmap: */ 1067 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH5, si->engine.fifo.handle[5]); 1068 if (si->ps.card_arch < NV40A) 1069 { 1070 /* 3D surfaces: (3D related only) */ 1071 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH6, si->engine.fifo.handle[6]); 1072 /* Textured Triangle: (3D only) */ 1073 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH7, si->engine.fifo.handle[7]); 1074 } 1075 1076 /*** Set pixel width ***/ 1077 switch(si->dm.space) 1078 { 1079 case B_CMAP8: 1080 surf_depth = 0x00000001; 1081 cmd_depth = 0x00000003; 1082 break; 1083 case B_RGB15_LITTLE: 1084 case B_RGB16_LITTLE: 1085 surf_depth = 0x00000004; 1086 cmd_depth = 0x00000001; 1087 break; 1088 case B_RGB32_LITTLE: 1089 case B_RGBA32_LITTLE: 1090 surf_depth = 0x00000006; 1091 cmd_depth = 0x00000003; 1092 break; 1093 default: 1094 LOG(8,("ACC_DMA: init, invalid bit depth\n")); 1095 return B_ERROR; 1096 } 1097 1098 /* wait for room in fifo for surface setup cmd if needed */ 1099 if (nv_acc_fifofree_dma(5) != B_OK) return B_ERROR; 1100 /* now setup 2D surface (writing 5 32bit words) */ 1101 nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 4); 1102 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = surf_depth; /* Format */ 1103 /* setup screen pitch */ 1104 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1105 ((si->fbc.bytes_per_row & 0x0000ffff) | (si->fbc.bytes_per_row << 16)); /* Pitch */ 1106 /* setup screen location */ 1107 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1108 ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer); /* OffsetSource */ 1109 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1110 ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer); /* OffsetDest */ 1111 1112 /* wait for room in fifo for pattern colordepth setup cmd if needed */ 1113 if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR; 1114 /* set pattern colordepth (writing 2 32bit words) */ 1115 nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETCOLORFORMAT, 1); 1116 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ 1117 1118 /* wait for room in fifo for bitmap colordepth setup cmd if needed */ 1119 if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR; 1120 /* set bitmap colordepth (writing 2 32bit words) */ 1121 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_SETCOLORFORMAT, 1); 1122 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ 1123 1124 /* Load our pattern into the engine: */ 1125 /* wait for room in fifo for pattern cmd if needed. */ 1126 if (nv_acc_fifofree_dma(7) != B_OK) return B_ERROR; 1127 /* now setup pattern (writing 7 32bit words) */ 1128 nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETSHAPE, 1); 1129 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000000; /* SetShape: 0 = 8x8, 1 = 64x1, 2 = 1x64 */ 1130 nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETCOLOR0, 4); 1131 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetColor0 */ 1132 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetColor1 */ 1133 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetPattern[0] */ 1134 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetPattern[1] */ 1135 1136 /* tell the engine to fetch and execute all (new) commands in the DMA buffer */ 1137 nv_start_dma(); 1138 1139 return B_OK; 1140 } 1141 1142 static void nv_init_for_3D_dma(void) 1143 { 1144 /* setup PGRAPH unknown registers and modify (pre-cleared) pipe stuff for 3D use */ 1145 if (si->ps.card_arch >= NV10A) 1146 { 1147 /* setup unknown PGRAPH stuff */ 1148 ACCW(PGWHAT_00, 0x00000000); 1149 ACCW(PGWHAT_01, 0x00000000); 1150 ACCW(PGWHAT_02, 0x00000000); 1151 ACCW(PGWHAT_03, 0x00000000); 1152 1153 ACCW(PGWHAT_04, 0x00001000); 1154 ACCW(PGWHAT_05, 0x00001000); 1155 ACCW(PGWHAT_06, 0x4003ff80); 1156 1157 ACCW(PGWHAT_07, 0x00000000); 1158 ACCW(PGWHAT_08, 0x00000000); 1159 ACCW(PGWHAT_09, 0x00000000); 1160 ACCW(PGWHAT_0A, 0x00000000); 1161 ACCW(PGWHAT_0B, 0x00000000); 1162 1163 ACCW(PGWHAT_0C, 0x00080008); 1164 ACCW(PGWHAT_0D, 0x00080008); 1165 1166 ACCW(PGWHAT_0E, 0x00000000); 1167 ACCW(PGWHAT_0F, 0x00000000); 1168 ACCW(PGWHAT_10, 0x00000000); 1169 ACCW(PGWHAT_11, 0x00000000); 1170 ACCW(PGWHAT_12, 0x00000000); 1171 ACCW(PGWHAT_13, 0x00000000); 1172 ACCW(PGWHAT_14, 0x00000000); 1173 ACCW(PGWHAT_15, 0x00000000); 1174 ACCW(PGWHAT_16, 0x00000000); 1175 ACCW(PGWHAT_17, 0x00000000); 1176 ACCW(PGWHAT_18, 0x00000000); 1177 1178 ACCW(PGWHAT_19, 0x10000000); 1179 1180 ACCW(PGWHAT_1A, 0x00000000); 1181 ACCW(PGWHAT_1B, 0x00000000); 1182 ACCW(PGWHAT_1C, 0x00000000); 1183 ACCW(PGWHAT_1D, 0x00000000); 1184 ACCW(PGWHAT_1E, 0x00000000); 1185 ACCW(PGWHAT_1F, 0x00000000); 1186 ACCW(PGWHAT_20, 0x00000000); 1187 ACCW(PGWHAT_21, 0x00000000); 1188 1189 ACCW(PGWHAT_22, 0x08000000); 1190 1191 ACCW(PGWHAT_23, 0x00000000); 1192 ACCW(PGWHAT_24, 0x00000000); 1193 ACCW(PGWHAT_25, 0x00000000); 1194 ACCW(PGWHAT_26, 0x00000000); 1195 1196 ACCW(PGWHAT_27, 0x4b7fffff); 1197 1198 ACCW(PGWHAT_28, 0x00000000); 1199 ACCW(PGWHAT_29, 0x00000000); 1200 ACCW(PGWHAT_2A, 0x00000000); 1201 1202 /* setup window clipping */ 1203 /* b0-11 = min; b16-27 = max. 1204 * note: 1205 * probably two's complement values, so setting to max range here: 1206 * which would be -2048 upto/including +2047. */ 1207 /* horizontal */ 1208 ACCW(WINCLIP_H_0, 0x07ff0800); 1209 ACCW(WINCLIP_H_1, 0x07ff0800); 1210 ACCW(WINCLIP_H_2, 0x07ff0800); 1211 ACCW(WINCLIP_H_3, 0x07ff0800); 1212 ACCW(WINCLIP_H_4, 0x07ff0800); 1213 ACCW(WINCLIP_H_5, 0x07ff0800); 1214 ACCW(WINCLIP_H_6, 0x07ff0800); 1215 ACCW(WINCLIP_H_7, 0x07ff0800); 1216 /* vertical */ 1217 ACCW(WINCLIP_V_0, 0x07ff0800); 1218 ACCW(WINCLIP_V_1, 0x07ff0800); 1219 ACCW(WINCLIP_V_2, 0x07ff0800); 1220 ACCW(WINCLIP_V_3, 0x07ff0800); 1221 ACCW(WINCLIP_V_4, 0x07ff0800); 1222 ACCW(WINCLIP_V_5, 0x07ff0800); 1223 ACCW(WINCLIP_V_6, 0x07ff0800); 1224 ACCW(WINCLIP_V_7, 0x07ff0800); 1225 1226 /* setup (initialize) pipe: 1227 * needed to get valid 3D rendering on (at least) NV1x cards. Without this 1228 * those cards produce rubbish instead of 3D, although the engine itself keeps 1229 * running and 2D stays OK. */ 1230 1231 /* set eyetype to local, lightning etc. is off */ 1232 ACCW(NV10_XFMOD0, 0x10000000); 1233 /* disable all lights */ 1234 ACCW(NV10_XFMOD1, 0x00000000); 1235 1236 /* note: upon writing data into the PIPEDAT register, the PIPEADR is 1237 * probably auto-incremented! */ 1238 /* (pipe adress = b2-16, pipe data = b0-31) */ 1239 /* note: pipe adresses IGRAPH registers! */ 1240 ACCW(NV10_PIPEADR, 0x00006740); 1241 ACCW(NV10_PIPEDAT, 0x00000000); 1242 ACCW(NV10_PIPEDAT, 0x00000000); 1243 ACCW(NV10_PIPEDAT, 0x00000000); 1244 ACCW(NV10_PIPEDAT, 0x3f800000); 1245 1246 ACCW(NV10_PIPEADR, 0x00006750); 1247 ACCW(NV10_PIPEDAT, 0x40000000); 1248 ACCW(NV10_PIPEDAT, 0x40000000); 1249 ACCW(NV10_PIPEDAT, 0x40000000); 1250 ACCW(NV10_PIPEDAT, 0x40000000); 1251 1252 ACCW(NV10_PIPEADR, 0x00006760); 1253 ACCW(NV10_PIPEDAT, 0x00000000); 1254 ACCW(NV10_PIPEDAT, 0x00000000); 1255 ACCW(NV10_PIPEDAT, 0x3f800000); 1256 ACCW(NV10_PIPEDAT, 0x00000000); 1257 1258 ACCW(NV10_PIPEADR, 0x00006770); 1259 ACCW(NV10_PIPEDAT, 0xc5000000); 1260 ACCW(NV10_PIPEDAT, 0xc5000000); 1261 ACCW(NV10_PIPEDAT, 0x00000000); 1262 ACCW(NV10_PIPEDAT, 0x00000000); 1263 1264 ACCW(NV10_PIPEADR, 0x00006780); 1265 ACCW(NV10_PIPEDAT, 0x00000000); 1266 ACCW(NV10_PIPEDAT, 0x00000000); 1267 ACCW(NV10_PIPEDAT, 0x3f800000); 1268 ACCW(NV10_PIPEDAT, 0x00000000); 1269 1270 ACCW(NV10_PIPEADR, 0x000067a0); 1271 ACCW(NV10_PIPEDAT, 0x3f800000); 1272 ACCW(NV10_PIPEDAT, 0x3f800000); 1273 ACCW(NV10_PIPEDAT, 0x3f800000); 1274 ACCW(NV10_PIPEDAT, 0x3f800000); 1275 1276 ACCW(NV10_PIPEADR, 0x00006ab0); 1277 ACCW(NV10_PIPEDAT, 0x3f800000); 1278 ACCW(NV10_PIPEDAT, 0x3f800000); 1279 ACCW(NV10_PIPEDAT, 0x3f800000); 1280 1281 ACCW(NV10_PIPEADR, 0x00006ac0); 1282 ACCW(NV10_PIPEDAT, 0x00000000); 1283 ACCW(NV10_PIPEDAT, 0x00000000); 1284 ACCW(NV10_PIPEDAT, 0x00000000); 1285 1286 ACCW(NV10_PIPEADR, 0x00006c10); 1287 ACCW(NV10_PIPEDAT, 0xbf800000); 1288 1289 ACCW(NV10_PIPEADR, 0x00007030); 1290 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1291 1292 ACCW(NV10_PIPEADR, 0x00007040); 1293 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1294 1295 ACCW(NV10_PIPEADR, 0x00007050); 1296 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1297 1298 ACCW(NV10_PIPEADR, 0x00007060); 1299 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1300 1301 ACCW(NV10_PIPEADR, 0x00007070); 1302 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1303 1304 ACCW(NV10_PIPEADR, 0x00007080); 1305 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1306 1307 ACCW(NV10_PIPEADR, 0x00007090); 1308 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1309 1310 ACCW(NV10_PIPEADR, 0x000070a0); 1311 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1312 1313 ACCW(NV10_PIPEADR, 0x00006a80); 1314 ACCW(NV10_PIPEDAT, 0x00000000); 1315 ACCW(NV10_PIPEDAT, 0x00000000); 1316 ACCW(NV10_PIPEDAT, 0x3f800000); 1317 1318 ACCW(NV10_PIPEADR, 0x00006aa0); 1319 ACCW(NV10_PIPEDAT, 0x00000000); 1320 ACCW(NV10_PIPEDAT, 0x00000000); 1321 ACCW(NV10_PIPEDAT, 0x00000000); 1322 1323 /* select primitive type that will be drawn (tri's) */ 1324 ACCW(NV10_PIPEADR, 0x00000040); 1325 ACCW(NV10_PIPEDAT, 0x00000005); 1326 1327 ACCW(NV10_PIPEADR, 0x00006400); 1328 ACCW(NV10_PIPEDAT, 0x3f800000); 1329 ACCW(NV10_PIPEDAT, 0x3f800000); 1330 ACCW(NV10_PIPEDAT, 0x4b7fffff); 1331 ACCW(NV10_PIPEDAT, 0x00000000); 1332 1333 ACCW(NV10_PIPEADR, 0x00006410); 1334 ACCW(NV10_PIPEDAT, 0xc5000000); 1335 ACCW(NV10_PIPEDAT, 0xc5000000); 1336 ACCW(NV10_PIPEDAT, 0x00000000); 1337 ACCW(NV10_PIPEDAT, 0x00000000); 1338 1339 ACCW(NV10_PIPEADR, 0x00006420); 1340 ACCW(NV10_PIPEDAT, 0x00000000); 1341 ACCW(NV10_PIPEDAT, 0x00000000); 1342 ACCW(NV10_PIPEDAT, 0x00000000); 1343 ACCW(NV10_PIPEDAT, 0x00000000); 1344 1345 ACCW(NV10_PIPEADR, 0x00006430); 1346 ACCW(NV10_PIPEDAT, 0x00000000); 1347 ACCW(NV10_PIPEDAT, 0x00000000); 1348 ACCW(NV10_PIPEDAT, 0x00000000); 1349 ACCW(NV10_PIPEDAT, 0x00000000); 1350 1351 ACCW(NV10_PIPEADR, 0x000064c0); 1352 ACCW(NV10_PIPEDAT, 0x3f800000); 1353 ACCW(NV10_PIPEDAT, 0x3f800000); 1354 ACCW(NV10_PIPEDAT, 0x477fffff); 1355 ACCW(NV10_PIPEDAT, 0x3f800000); 1356 1357 ACCW(NV10_PIPEADR, 0x000064d0); 1358 ACCW(NV10_PIPEDAT, 0xc5000000); 1359 ACCW(NV10_PIPEDAT, 0xc5000000); 1360 ACCW(NV10_PIPEDAT, 0x00000000); 1361 ACCW(NV10_PIPEDAT, 0x00000000); 1362 1363 ACCW(NV10_PIPEADR, 0x000064e0); 1364 ACCW(NV10_PIPEDAT, 0xc4fff000); 1365 ACCW(NV10_PIPEDAT, 0xc4fff000); 1366 ACCW(NV10_PIPEDAT, 0x00000000); 1367 ACCW(NV10_PIPEDAT, 0x00000000); 1368 1369 ACCW(NV10_PIPEADR, 0x000064f0); 1370 ACCW(NV10_PIPEDAT, 0x00000000); 1371 ACCW(NV10_PIPEDAT, 0x00000000); 1372 ACCW(NV10_PIPEDAT, 0x00000000); 1373 ACCW(NV10_PIPEDAT, 0x00000000); 1374 1375 /* turn lightning on */ 1376 ACCW(NV10_XFMOD0, 0x30000000); 1377 /* set light 1 to infinite type, other lights remain off */ 1378 ACCW(NV10_XFMOD1, 0x00000004); 1379 1380 /* Z-buffer state is: 1381 * initialized, set to: 'fixed point' (integer?); Z-buffer; 16bits depth */ 1382 /* note: 1383 * other options possible are: floating point; 24bits depth; W-buffer */ 1384 ACCW(GLOB_STAT_0, 0x10000000); 1385 /* set DMA instance 2 and 3 to be invalid */ 1386 ACCW(GLOB_STAT_1, 0x00000000); 1387 } 1388 } 1389 1390 static void nv_start_dma(void) 1391 { 1392 uint32 dummy; 1393 1394 if (si->engine.dma.current != si->engine.dma.put) 1395 { 1396 si->engine.dma.put = si->engine.dma.current; 1397 /* flush used caches so we know for sure the DMA cmd buffer received all data. */ 1398 if (si->ps.card_arch < NV40A) 1399 { 1400 /* some CPU's support out-of-order processing (WinChip/Cyrix). Flush them. */ 1401 __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory"); 1402 /* read a non-cached adress to flush the cash */ 1403 dummy = ACCR(STATUS); 1404 } 1405 else 1406 { 1407 /* dummy read the first adress of the framebuffer to flush MTRR-WC buffers */ 1408 dummy = *((volatile uint32 *)(si->framebuffer)); 1409 } 1410 1411 /* actually start DMA to execute all commands now in buffer */ 1412 /* note: 1413 * it doesn't matter which FIFO channel's DMA registers we access, they are in 1414 * fact all the same set. It also doesn't matter if the channel was assigned a 1415 * command or not. */ 1416 /* note also: 1417 * NV_GENERAL_DMAPUT is a write-only register on some cards (confirmed NV11). */ 1418 NV_REG32(NVACC_FIFO + NV_GENERAL_DMAPUT) = (si->engine.dma.put << 2); 1419 } 1420 } 1421 1422 /* this routine does not check the engine's internal hardware FIFO, but the DMA 1423 * command buffer. You can see this as a FIFO as well, that feeds the hardware FIFO. 1424 * The hardware FIFO state is checked by the DMA hardware automatically. */ 1425 static status_t nv_acc_fifofree_dma(uint16 cmd_size) 1426 { 1427 uint32 dmaget; 1428 1429 /* we'd better check for timeouts on the DMA engine as it's theoretically 1430 * breakable by malfunctioning software */ 1431 uint16 cnt = 0; 1432 1433 /* check if the DMA buffer has enough room for the command. 1434 * note: 1435 * engine.dma.free is 'cached' */ 1436 while ((si->engine.dma.free < cmd_size) && (cnt < 10000) && (err < 3)) 1437 { 1438 /* see where the engine is currently fetching from the buffer */ 1439 /* note: 1440 * read this only once in the code as accessing registers is relatively slow */ 1441 /* note also: 1442 * it doesn't matter which FIFO channel's DMA registers we access, they are in 1443 * fact all the same set. It also doesn't matter if the channel was assigned a 1444 * command or not. */ 1445 dmaget = ((NV_REG32(NVACC_FIFO + NV_GENERAL_DMAGET)) >> 2); 1446 1447 /* update timeout counter: on NV11 on a Pentium4 2.8Ghz max reached count 1448 * using BeRoMeter 1.2.6 was about 600; so counting 10000 before generating 1449 * a timeout should definately do it. Snooze()-ing cannot be done without a 1450 * serious speed penalty, even if done for only 1 microSecond. */ 1451 cnt++; 1452 1453 /* where's the engine fetching viewed from us issuing? */ 1454 if (si->engine.dma.put >= dmaget) 1455 { 1456 /* engine is fetching 'behind us', the last piece of the buffer is free */ 1457 1458 /* note the 'updated' free space we have in the DMA buffer */ 1459 si->engine.dma.free = si->engine.dma.max - si->engine.dma.current; 1460 /* if it's enough after all we exit this routine immediately. Else: */ 1461 if (si->engine.dma.free < cmd_size) 1462 { 1463 /* not enough room left, so instruct DMA engine to reset the buffer 1464 * when it's reaching the end of it */ 1465 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x20000000; 1466 /* reset our buffer pointer, so new commands will be placed at the 1467 * beginning of the buffer. */ 1468 si->engine.dma.current = 0; 1469 /* tell the engine to fetch the remaining command(s) in the DMA buffer 1470 * that where not executed before. */ 1471 nv_start_dma(); 1472 1473 /* NOW the engine is fetching 'in front of us', so the first piece 1474 * of the buffer is free */ 1475 1476 /* note the updated current free space we have in the DMA buffer */ 1477 si->engine.dma.free = dmaget - si->engine.dma.current; 1478 /* mind this pittfall: 1479 * Leave some room between where the engine is fetching and where we 1480 * put new commands. Otherwise the engine will crash on heavy loads. 1481 * A crash can be forced best in 640x480x32 mode with BeRoMeter 1.2.6. 1482 * (confirmed on NV11 and NV43 with less than 256 words forced freespace.) 1483 * Note: 1484 * The engine is DMA triggered for fetching chunks every 128 bytes, 1485 * maybe this is the reason for this behaviour. 1486 * Note also: 1487 * it looks like the space that needs to be kept free is coupled 1488 * with the size of the DMA buffer. */ 1489 if (si->engine.dma.free < 256) 1490 si->engine.dma.free = 0; 1491 else 1492 si->engine.dma.free -= 256; 1493 } 1494 } 1495 else 1496 { 1497 /* engine is fetching 'in front of us', so the first piece of the buffer 1498 * is free */ 1499 1500 /* note the updated current free space we have in the DMA buffer */ 1501 si->engine.dma.free = dmaget - si->engine.dma.current; 1502 /* mind this pittfall: 1503 * Leave some room between where the engine is fetching and where we 1504 * put new commands. Otherwise the engine will crash on heavy loads. 1505 * A crash can be forced best in 640x480x32 mode with BeRoMeter 1.2.6. 1506 * (confirmed on NV11 and NV43 with less than 256 words forced freespace.) 1507 * Note: 1508 * The engine is DMA triggered for fetching chunks every 128 bytes, 1509 * maybe this is the reason for this behaviour. 1510 * Note also: 1511 * it looks like the space that needs to be kept free is coupled 1512 * with the size of the DMA buffer. */ 1513 if (si->engine.dma.free < 256) 1514 si->engine.dma.free = 0; 1515 else 1516 si->engine.dma.free -= 256; 1517 } 1518 } 1519 1520 /* log timeout if we had one */ 1521 if (cnt == 10000) 1522 { 1523 if (err < 3) err++; 1524 LOG(4,("ACC_DMA: fifofree; DMA timeout #%d, engine trouble!\n", err)); 1525 } 1526 1527 /* we must make the acceleration routines abort or the driver will hang! */ 1528 if (err >= 3) return B_ERROR; 1529 1530 return B_OK; 1531 } 1532 1533 static void nv_acc_cmd_dma(uint32 cmd, uint16 offset, uint16 size) 1534 { 1535 /* NV_FIFO_DMA_OPCODE: set number of cmd words (b18 - 28); set FIFO offset for 1536 * first cmd word (b2 - 15); set DMA opcode = method (b29 - 31). 1537 * a 'NOP' is the opcode word $00000000. */ 1538 /* note: 1539 * possible DMA opcodes: 1540 * b'000' is 'method' (execute cmd); 1541 * b'001' is 'jump'; 1542 * b'002' is 'noninc method' (execute buffer wrap-around); 1543 * b'003' is 'call': return is executed by opcode word $00020000 (b17 = 1). */ 1544 /* note also: 1545 * this system uses auto-increments for the FIFO offset adresses. Make sure 1546 * to set a new adress if a gap exists between the previous one and the new one. */ 1547 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = ((size << 18) | 1548 ((si->engine.fifo.ch_ptr[cmd] + offset) & 0x0000fffc)); 1549 1550 /* space left after issuing the current command is the cmd AND it's arguments less */ 1551 si->engine.dma.free -= (size + 1); 1552 } 1553 1554 static void nv_acc_set_ch_dma(uint16 ch, uint32 handle) 1555 { 1556 /* issue FIFO channel assign cmd */ 1557 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = ((1 << 18) | ch); 1558 /* set new assignment */ 1559 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = (0x80000000 | handle); 1560 1561 /* space left after issuing the current command is the cmd AND it's arguments less */ 1562 si->engine.dma.free -= 2; 1563 } 1564 1565 /* note: 1566 * switching fifo channel assignments this way has no noticable slowdown: 1567 * measured 0.2% with Quake2. */ 1568 void nv_acc_assert_fifo_dma(void) 1569 { 1570 /* does every engine cmd this accelerant needs have a FIFO channel? */ 1571 //fixme: can probably be optimized for both speed and channel selection... 1572 if (!si->engine.fifo.ch_ptr[NV_ROP5_SOLID] || 1573 !si->engine.fifo.ch_ptr[NV_IMAGE_BLACK_RECTANGLE] || 1574 !si->engine.fifo.ch_ptr[NV_IMAGE_PATTERN] || 1575 !si->engine.fifo.ch_ptr[NV4_SURFACE] || 1576 !si->engine.fifo.ch_ptr[NV_IMAGE_BLIT] || 1577 !si->engine.fifo.ch_ptr[NV4_GDI_RECTANGLE_TEXT] || 1578 !si->engine.fifo.ch_ptr[NV_SCALED_IMAGE_FROM_MEMORY]) 1579 { 1580 uint16 cnt; 1581 1582 /* free the FIFO channels we want from the currently assigned cmd's */ 1583 si->engine.fifo.ch_ptr[si->engine.fifo.handle[0]] = 0; 1584 si->engine.fifo.ch_ptr[si->engine.fifo.handle[1]] = 0; 1585 si->engine.fifo.ch_ptr[si->engine.fifo.handle[2]] = 0; 1586 si->engine.fifo.ch_ptr[si->engine.fifo.handle[3]] = 0; 1587 si->engine.fifo.ch_ptr[si->engine.fifo.handle[4]] = 0; 1588 si->engine.fifo.ch_ptr[si->engine.fifo.handle[5]] = 0; 1589 si->engine.fifo.ch_ptr[si->engine.fifo.handle[6]] = 0; 1590 1591 /* set new object handles */ 1592 si->engine.fifo.handle[0] = NV_ROP5_SOLID; 1593 si->engine.fifo.handle[1] = NV_IMAGE_BLACK_RECTANGLE; 1594 si->engine.fifo.handle[2] = NV_IMAGE_PATTERN; 1595 si->engine.fifo.handle[3] = NV4_SURFACE; 1596 si->engine.fifo.handle[4] = NV_IMAGE_BLIT; 1597 si->engine.fifo.handle[5] = NV4_GDI_RECTANGLE_TEXT; 1598 si->engine.fifo.handle[6] = NV_SCALED_IMAGE_FROM_MEMORY; 1599 1600 /* set handle's pointers to their assigned FIFO channels */ 1601 /* note: 1602 * b0-1 aren't used as adressbits. Using b0 to indicate a valid pointer. */ 1603 for (cnt = 0; cnt < 0x08; cnt++) 1604 { 1605 si->engine.fifo.ch_ptr[(si->engine.fifo.handle[cnt])] = 1606 (0x00000001 + (cnt * 0x00002000)); 1607 } 1608 1609 /* wait for room in fifo for new FIFO assigment cmds if needed. */ 1610 if (nv_acc_fifofree_dma(14) != B_OK) return; 1611 1612 /* program new FIFO assignments */ 1613 /* Raster OPeration: */ 1614 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH0, si->engine.fifo.handle[0]); 1615 /* Clip: */ 1616 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH1, si->engine.fifo.handle[1]); 1617 /* Pattern: */ 1618 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH2, si->engine.fifo.handle[2]); 1619 /* 2D Surface: */ 1620 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH3, si->engine.fifo.handle[3]); 1621 /* Blit: */ 1622 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH4, si->engine.fifo.handle[4]); 1623 /* Bitmap: */ 1624 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH5, si->engine.fifo.handle[5]); 1625 /* Scaled and fitered Blit: */ 1626 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH6, si->engine.fifo.handle[6]); 1627 1628 /* tell the engine to fetch and execute all (new) commands in the DMA buffer */ 1629 nv_start_dma(); 1630 } 1631 } 1632 1633 /* 1634 note: 1635 moved acceleration 'top-level' routines to be integrated in the engine: 1636 it is costly to call the engine for every single function within a loop! 1637 (measured with BeRoMeter 1.2.6: upto 15% speed increase on all CPU's.) 1638 1639 note also: 1640 splitting up each command list into sublists (see routines below) prevents 1641 a lot more nested calls, further increasing the speed with upto 70%. 1642 1643 finally: 1644 sending the sublist to just one single engine command even further increases 1645 speed with upto another 10%. This can't be done for blits though, as this engine- 1646 command's hardware does not support multiple objects. 1647 */ 1648 1649 /* screen to screen blit - i.e. move windows around and scroll within them. */ 1650 void SCREEN_TO_SCREEN_BLIT_DMA(engine_token *et, blit_params *list, uint32 count) 1651 { 1652 uint32 i = 0; 1653 uint16 subcnt; 1654 1655 /*** init acc engine for blit function ***/ 1656 /* ROP registers (Raster OPeration): 1657 * wait for room in fifo for ROP cmd if needed. */ 1658 if (nv_acc_fifofree_dma(2) != B_OK) return; 1659 /* now setup ROP (writing 2 32bit words) for GXcopy */ 1660 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); 1661 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xcc; /* SetRop5 */ 1662 1663 /*** do each blit ***/ 1664 /* Note: 1665 * blit-copy direction is determined inside nvidia hardware: no setup needed */ 1666 while (count) 1667 { 1668 /* break up the list in sublists to minimize calls, while making sure long 1669 * lists still get executed without trouble */ 1670 subcnt = 32; 1671 if (count < 32) subcnt = count; 1672 count -= subcnt; 1673 1674 /* wait for room in fifo for blit cmd if needed. */ 1675 if (nv_acc_fifofree_dma(4 * subcnt) != B_OK) return; 1676 1677 while (subcnt--) 1678 { 1679 /* now setup blit (writing 4 32bit words) */ 1680 nv_acc_cmd_dma(NV_IMAGE_BLIT, NV_IMAGE_BLIT_SOURCEORG, 3); 1681 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1682 (((list[i].src_top) << 16) | (list[i].src_left)); /* SourceOrg */ 1683 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1684 (((list[i].dest_top) << 16) | (list[i].dest_left)); /* DestOrg */ 1685 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1686 ((((list[i].height) + 1) << 16) | ((list[i].width) + 1)); /* HeightWidth */ 1687 1688 i++; 1689 } 1690 1691 /* tell the engine to fetch the commands in the DMA buffer that where not 1692 * executed before. */ 1693 nv_start_dma(); 1694 } 1695 1696 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 1697 si->engine.threeD.reload = 0xffffffff; 1698 } 1699 1700 /* scaled and filtered screen to screen blit - i.e. video playback without overlay */ 1701 /* note: source and destination may not overlap. */ 1702 //fixme? checkout NV5 and NV10 version of cmd: faster?? (or is 0x77 a 'autoselect' version?) 1703 void SCREEN_TO_SCREEN_SCALED_FILTERED_BLIT_DMA(engine_token *et, scaled_blit_params *list, uint32 count) 1704 { 1705 uint32 i = 0; 1706 uint16 subcnt; 1707 uint32 cmd_depth; 1708 uint8 bpp; 1709 1710 /*** init acc engine for scaled filtered blit function ***/ 1711 /* Set pixel width */ 1712 switch(si->dm.space) 1713 { 1714 case B_RGB15_LITTLE: 1715 cmd_depth = 0x00000002; 1716 bpp = 2; 1717 break; 1718 case B_RGB16_LITTLE: 1719 cmd_depth = 0x00000007; 1720 bpp = 2; 1721 break; 1722 case B_RGB32_LITTLE: 1723 case B_RGBA32_LITTLE: 1724 cmd_depth = 0x00000004; 1725 bpp = 4; 1726 break; 1727 /* fixme sometime: 1728 * we could do the spaces below if this function would be modified to be able 1729 * to use a source outside of the desktop, i.e. using offscreen bitmaps... */ 1730 case B_YCbCr422: 1731 cmd_depth = 0x00000005; 1732 bpp = 2; 1733 break; 1734 case B_YUV422: 1735 cmd_depth = 0x00000006; 1736 bpp = 2; 1737 break; 1738 default: 1739 /* note: this function does not support src or dest in the B_CMAP8 space! */ 1740 //fixme: the NV10 version of this cmd supports B_CMAP8 src though... (checkout) 1741 LOG(8,("ACC_DMA: scaled_filtered_blit, invalid bit depth\n")); 1742 return; 1743 } 1744 1745 /* modify surface depth settings for 15-bit colorspace so command works as intended */ 1746 if (si->dm.space == B_RGB15_LITTLE) 1747 { 1748 /* wait for room in fifo for surface setup cmd if needed */ 1749 if (nv_acc_fifofree_dma(2) != B_OK) return; 1750 /* now setup 2D surface (writing 1 32bit word) */ 1751 nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 1); 1752 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000002; /* Format */ 1753 } 1754 1755 /* TNT1 has fixed operation mode 'SRCcopy' while the rest can be programmed: */ 1756 if (si->ps.card_type != NV04) 1757 { 1758 /* wait for room in fifo for cmds if needed. */ 1759 if (nv_acc_fifofree_dma(5) != B_OK) return; 1760 /* now setup source bitmap colorspace */ 1761 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SETCOLORFORMAT, 2); 1762 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ 1763 /* now setup operation mode to SRCcopy */ 1764 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000003; /* SetOperation */ 1765 } 1766 else 1767 { 1768 /* wait for room in fifo for cmd if needed. */ 1769 if (nv_acc_fifofree_dma(4) != B_OK) return; 1770 /* now setup source bitmap colorspace */ 1771 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SETCOLORFORMAT, 1); 1772 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ 1773 /* TNT1 has fixed operation mode SRCcopy */ 1774 } 1775 /* now setup fill color (writing 2 32bit words) */ 1776 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); 1777 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000000; /* Color1A */ 1778 1779 /*** do each blit ***/ 1780 while (count) 1781 { 1782 /* break up the list in sublists to minimize calls, while making sure long 1783 * lists still get executed without trouble */ 1784 subcnt = 16; 1785 if (count < 16) subcnt = count; 1786 count -= subcnt; 1787 1788 /* wait for room in fifo for blit cmd if needed. */ 1789 if (nv_acc_fifofree_dma(12 * subcnt) != B_OK) return; 1790 1791 while (subcnt--) 1792 { 1793 /* now setup blit (writing 12 32bit words) */ 1794 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SOURCEORG, 6); 1795 /* setup dest clipping ref for blit (not used) (b0-15 = left, b16-31 = top) */ 1796 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0; /* SourceOrg */ 1797 /* setup dest clipping size for blit */ 1798 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1799 (((list[i].dest_height + 1) << 16) | (list[i].dest_width + 1)); /* SourceHeightWidth */ 1800 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1801 /* setup destination location and size for blit */ 1802 (((list[i].dest_top) << 16) | (list[i].dest_left)); /* DestOrg */ 1803 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1804 (((list[i].dest_height + 1) << 16) | (list[i].dest_width + 1)); /* DestHeightWidth */ 1805 //fixme: findout scaling limits... (although the current cmd interface doesn't support them.) 1806 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1807 (((list[i].src_width + 1) << 20) / (list[i].dest_width + 1)); /* HorInvScale (in 12.20 format) */ 1808 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1809 (((list[i].src_height + 1) << 20) / (list[i].dest_height + 1)); /* VerInvScale (in 12.20 format) */ 1810 1811 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SOURCESIZE, 4); 1812 /* setup horizontal and vertical source (fetching) ends. 1813 * note: 1814 * horizontal granularity is 2 pixels, vertical granularity is 1 pixel. 1815 * look at Matrox or Neomagic bes engines code for usage example. */ 1816 //fixme: tested 15, 16 and 32-bit RGB depth, verify other depths... 1817 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1818 (((list[i].src_height + 1) << 16) | 1819 (((list[i].src_width + 1) + 0x0001) & ~0x0001)); /* SourceHeightWidth */ 1820 /* setup source pitch (b0-15). Set 'format origin center' (b16-17) and 1821 * select 'format interpolator foh (bilinear filtering)' (b24). */ 1822 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1823 (si->fbc.bytes_per_row | (1 << 16) | (1 << 24)); /* SourcePitch */ 1824 /* setup source surface location */ 1825 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1826 ((uint32)((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)) + 1827 (list[i].src_top * si->fbc.bytes_per_row) + (list[i].src_left * bpp); /* Offset */ 1828 /* setup source start: first (sub)pixel contributing to output picture */ 1829 /* note: 1830 * clipping is not asked for. 1831 * look at nVidia NV10+ bes engine code for useage example. */ 1832 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1833 0; /* SourceRef (b0-15 = hor, b16-31 = ver: both in 12.4 format) */ 1834 1835 i++; 1836 } 1837 1838 /* tell the engine to fetch the commands in the DMA buffer that where not 1839 * executed before. */ 1840 nv_start_dma(); 1841 } 1842 1843 /* reset surface depth settings so the other engine commands works as intended */ 1844 if (si->dm.space == B_RGB15_LITTLE) 1845 { 1846 /* wait for room in fifo for surface setup cmd if needed */ 1847 if (nv_acc_fifofree_dma(2) != B_OK) return; 1848 /* now setup 2D surface (writing 1 32bit word) */ 1849 nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 1); 1850 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000004; /* Format */ 1851 1852 /* tell the engine to fetch the commands in the DMA buffer that where not 1853 * executed before. */ 1854 nv_start_dma(); 1855 } 1856 1857 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 1858 si->engine.threeD.reload = 0xffffffff; 1859 } 1860 1861 /* rectangle fill - i.e. workspace and window background color */ 1862 void FILL_RECTANGLE_DMA(engine_token *et, uint32 colorIndex, fill_rect_params *list, uint32 count) 1863 { 1864 uint32 i = 0; 1865 uint16 subcnt; 1866 1867 /*** init acc engine for fill function ***/ 1868 /* ROP registers (Raster OPeration): 1869 * wait for room in fifo for ROP and bitmap cmd if needed. */ 1870 if (nv_acc_fifofree_dma(4) != B_OK) return; 1871 /* now setup ROP (writing 2 32bit words) for GXcopy */ 1872 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); 1873 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xcc; /* SetRop5 */ 1874 /* now setup fill color (writing 2 32bit words) */ 1875 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); 1876 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = colorIndex; /* Color1A */ 1877 1878 /*** draw each rectangle ***/ 1879 while (count) 1880 { 1881 /* break up the list in sublists to minimize calls, while making sure long 1882 * lists still get executed without trouble */ 1883 subcnt = 32; 1884 if (count < 32) subcnt = count; 1885 count -= subcnt; 1886 1887 /* wait for room in fifo for bitmap cmd if needed. */ 1888 if (nv_acc_fifofree_dma(1 + (2 * subcnt)) != B_OK) return; 1889 1890 /* issue fill command once... */ 1891 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, (2 * subcnt)); 1892 /* ... and send multiple rects (engine cmd supports 32 max) */ 1893 while (subcnt--) 1894 { 1895 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1896 (((list[i].left) << 16) | ((list[i].top) & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */ 1897 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1898 (((((list[i].right)+1) - (list[i].left)) << 16) | 1899 (((list[i].bottom-list[i].top)+1) & 0x0000ffff)); /* Unclipped Rect 0 WidthHeight */ 1900 1901 i++; 1902 } 1903 1904 /* tell the engine to fetch the commands in the DMA buffer that where not 1905 * executed before. */ 1906 nv_start_dma(); 1907 } 1908 1909 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 1910 si->engine.threeD.reload = 0xffffffff; 1911 } 1912 1913 /* span fill - i.e. (selected) menuitem background color (Dano) */ 1914 void FILL_SPAN_DMA(engine_token *et, uint32 colorIndex, uint16 *list, uint32 count) 1915 { 1916 uint32 i = 0; 1917 uint16 subcnt; 1918 1919 /*** init acc engine for fill function ***/ 1920 /* ROP registers (Raster OPeration): 1921 * wait for room in fifo for ROP and bitmap cmd if needed. */ 1922 if (nv_acc_fifofree_dma(4) != B_OK) return; 1923 /* now setup ROP (writing 2 32bit words) for GXcopy */ 1924 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); 1925 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xcc; /* SetRop5 */ 1926 /* now setup fill color (writing 2 32bit words) */ 1927 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); 1928 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = colorIndex; /* Color1A */ 1929 1930 /*** draw each span ***/ 1931 while (count) 1932 { 1933 /* break up the list in sublists to minimize calls, while making sure long 1934 * lists still get executed without trouble */ 1935 subcnt = 32; 1936 if (count < 32) subcnt = count; 1937 count -= subcnt; 1938 1939 /* wait for room in fifo for bitmap cmd if needed. */ 1940 if (nv_acc_fifofree_dma(1 + (2 * subcnt)) != B_OK) return; 1941 1942 /* issue fill command once... */ 1943 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, (2 * subcnt)); 1944 /* ... and send multiple rects (spans) (engine cmd supports 32 max) */ 1945 while (subcnt--) 1946 { 1947 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1948 (((list[i+1]) << 16) | ((list[i]) & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */ 1949 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1950 ((((list[i+2]+1) - (list[i+1])) << 16) | 0x00000001); /* Unclipped Rect 0 WidthHeight */ 1951 1952 i+=3; 1953 } 1954 1955 /* tell the engine to fetch the commands in the DMA buffer that where not 1956 * executed before. */ 1957 nv_start_dma(); 1958 } 1959 1960 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 1961 si->engine.threeD.reload = 0xffffffff; 1962 } 1963 1964 /* rectangle invert - i.e. text cursor and text selection */ 1965 void INVERT_RECTANGLE_DMA(engine_token *et, fill_rect_params *list, uint32 count) 1966 { 1967 uint32 i = 0; 1968 uint16 subcnt; 1969 1970 /*** init acc engine for invert function ***/ 1971 /* ROP registers (Raster OPeration): 1972 * wait for room in fifo for ROP and bitmap cmd if needed. */ 1973 if (nv_acc_fifofree_dma(4) != B_OK) return; 1974 /* now setup ROP (writing 2 32bit words) for GXinvert */ 1975 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); 1976 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x55; /* SetRop5 */ 1977 /* now reset fill color (writing 2 32bit words) */ 1978 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); 1979 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000000; /* Color1A */ 1980 1981 /*** invert each rectangle ***/ 1982 while (count) 1983 { 1984 /* break up the list in sublists to minimize calls, while making sure long 1985 * lists still get executed without trouble */ 1986 subcnt = 32; 1987 if (count < 32) subcnt = count; 1988 count -= subcnt; 1989 1990 /* wait for room in fifo for bitmap cmd if needed. */ 1991 if (nv_acc_fifofree_dma(1 + (2 * subcnt)) != B_OK) return; 1992 1993 /* issue fill command once... */ 1994 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, (2 * subcnt)); 1995 /* ... and send multiple rects (engine cmd supports 32 max) */ 1996 while (subcnt--) 1997 { 1998 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1999 (((list[i].left) << 16) | ((list[i].top) & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */ 2000 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 2001 (((((list[i].right)+1) - (list[i].left)) << 16) | 2002 (((list[i].bottom-list[i].top)+1) & 0x0000ffff)); /* Unclipped Rect 0 WidthHeight */ 2003 2004 i++; 2005 } 2006 2007 /* tell the engine to fetch the commands in the DMA buffer that where not 2008 * executed before. */ 2009 nv_start_dma(); 2010 } 2011 2012 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 2013 si->engine.threeD.reload = 0xffffffff; 2014 } 2015