1 /* NV Acceleration functions */ 2 3 /* Author: 4 Rudolf Cornelissen 8/2003-2/2006. 5 6 This code was possible thanks to: 7 - the Linux XFree86 NV driver, 8 - the Linux UtahGLX 3D driver. 9 */ 10 11 #define MODULE_BIT 0x00080000 12 13 #include "nv_std.h" 14 15 /*acceleration notes*/ 16 17 /*functions Be's app_server uses: 18 fill span (horizontal only) 19 fill rectangle (these 2 are very similar) 20 invert rectangle 21 blit 22 */ 23 24 static void nv_init_for_3D_dma(void); 25 static void nv_start_dma(void); 26 static status_t nv_acc_fifofree_dma(uint16 cmd_size); 27 static void nv_acc_cmd_dma(uint32 cmd, uint16 offset, uint16 size); 28 static void nv_acc_set_ch_dma(uint16 ch, uint32 handle); 29 30 /* used to track engine DMA stalls */ 31 static uint8 err; 32 33 /* wait until engine completely idle */ 34 status_t nv_acc_wait_idle_dma() 35 { 36 /* we'd better check for timeouts on the DMA engine as it's theoretically 37 * breakable by malfunctioning software */ 38 uint16 cnt = 0; 39 40 /* wait until all upcoming commands are in execution at least. Do this until 41 * we hit a timeout; abort if we failed at least three times before: 42 * if DMA stalls, we have to forget about it alltogether at some point, or 43 * the system will almost come to a complete halt.. */ 44 /* note: 45 * it doesn't matter which FIFO channel's DMA registers we access, they are in 46 * fact all the same set. It also doesn't matter if the channel was assigned a 47 * command or not. */ 48 while ((NV_REG32(NVACC_FIFO + NV_GENERAL_DMAGET) != (si->engine.dma.put << 2)) && 49 (cnt < 10000) && (err < 3)) 50 { 51 /* snooze a bit so I do not hammer the bus */ 52 snooze (100); 53 cnt++; 54 } 55 56 /* log timeout if we had one */ 57 if (cnt == 10000) 58 { 59 if (err < 3) err++; 60 LOG(4,("ACC_DMA: wait_idle; DMA timeout #%d, engine trouble!\n", err)); 61 } 62 63 /* wait until execution completed */ 64 while (ACCR(STATUS)) 65 { 66 /* snooze a bit so I do not hammer the bus */ 67 snooze (100); 68 } 69 70 return B_OK; 71 } 72 73 /* AFAIK this must be done for every new screenmode. 74 * Engine required init. */ 75 status_t nv_acc_init_dma() 76 { 77 uint32 cnt, tmp; 78 uint32 surf_depth, cmd_depth; 79 /* reset the engine DMA stalls counter */ 80 err = 0; 81 82 /* a hanging engine only recovers from a complete power-down/power-up cycle */ 83 NV_REG32(NV32_PWRUPCTRL) = 0x13110011; 84 snooze(1000); 85 NV_REG32(NV32_PWRUPCTRL) = 0x13111111; 86 87 /* don't try this on NV20 and later.. */ 88 /* note: 89 * the specific register that's responsible for the speedfix on NV18 is 90 * $00400ed8: bit 6 needs to be zero for fastest rendering (confirmed). */ 91 /* note also: 92 * on NV28 the following ranges could be reset (confirmed): 93 * $00400000 upto/incl. $004002fc; 94 * $00400400 upto/incl. $004017fc; 95 * $0040180c upto/incl. $00401948; 96 * $00401994 upto/incl. $00401a80; 97 * $00401a94 upto/incl. $00401ffc. 98 * The intermediate ranges hang the engine upon resetting. */ 99 if (si->ps.card_arch < NV20A) 100 { 101 /* actively reset the PGRAPH registerset (acceleration engine) */ 102 for (cnt = 0x00400000; cnt < 0x00402000; cnt +=4) 103 { 104 NV_REG32(cnt) = 0x00000000; 105 } 106 } 107 108 /* setup PTIMER: */ 109 //fixme? how about NV28 setup as just after coldstarting? (see nv_info.c) 110 /* set timer numerator to 8 (in b0-15) */ 111 ACCW(PT_NUMERATOR, 0x00000008); 112 /* set timer denominator to 3 (in b0-15) */ 113 ACCW(PT_DENOMINATR, 0x00000003); 114 115 /* disable timer-alarm INT requests (b0) */ 116 ACCW(PT_INTEN, 0x00000000); 117 /* reset timer-alarm INT status bit (b0) */ 118 ACCW(PT_INTSTAT, 0xffffffff); 119 120 /* enable PRAMIN write access on pre NV10 before programming it! */ 121 if (si->ps.card_arch == NV04A) 122 { 123 /* set framebuffer config: type = notiling, PRAMIN write access enabled */ 124 NV_REG32(NV32_PFB_CONFIG_0) = 0x00001114; 125 } 126 else 127 { 128 /* setup acc engine 'source' tile adressranges */ 129 if ((si->ps.card_type <= NV40) || (si->ps.card_type == NV45)) 130 { 131 ACCW(NV10_FBTIL0AD, 0); 132 ACCW(NV10_FBTIL1AD, 0); 133 ACCW(NV10_FBTIL2AD, 0); 134 ACCW(NV10_FBTIL3AD, 0); 135 ACCW(NV10_FBTIL4AD, 0); 136 ACCW(NV10_FBTIL5AD, 0); 137 ACCW(NV10_FBTIL6AD, 0); 138 ACCW(NV10_FBTIL7AD, 0); 139 ACCW(NV10_FBTIL0ED, (si->ps.memory_size - 1)); 140 ACCW(NV10_FBTIL1ED, (si->ps.memory_size - 1)); 141 ACCW(NV10_FBTIL2ED, (si->ps.memory_size - 1)); 142 ACCW(NV10_FBTIL3ED, (si->ps.memory_size - 1)); 143 ACCW(NV10_FBTIL4ED, (si->ps.memory_size - 1)); 144 ACCW(NV10_FBTIL5ED, (si->ps.memory_size - 1)); 145 ACCW(NV10_FBTIL6ED, (si->ps.memory_size - 1)); 146 ACCW(NV10_FBTIL7ED, (si->ps.memory_size - 1)); 147 } 148 else 149 { 150 /* NV41, 43, 44, 47 */ 151 ACCW(NV41_FBTIL0AD, 0); 152 ACCW(NV41_FBTIL1AD, 0); 153 ACCW(NV41_FBTIL2AD, 0); 154 ACCW(NV41_FBTIL3AD, 0); 155 ACCW(NV41_FBTIL4AD, 0); 156 ACCW(NV41_FBTIL5AD, 0); 157 ACCW(NV41_FBTIL6AD, 0); 158 ACCW(NV41_FBTIL7AD, 0); 159 ACCW(NV41_FBTIL8AD, 0); 160 ACCW(NV41_FBTIL9AD, 0); 161 ACCW(NV41_FBTILAAD, 0); 162 ACCW(NV41_FBTILBAD, 0); 163 ACCW(NV41_FBTIL0ED, (si->ps.memory_size - 1)); 164 ACCW(NV41_FBTIL1ED, (si->ps.memory_size - 1)); 165 ACCW(NV41_FBTIL2ED, (si->ps.memory_size - 1)); 166 ACCW(NV41_FBTIL3ED, (si->ps.memory_size - 1)); 167 ACCW(NV41_FBTIL4ED, (si->ps.memory_size - 1)); 168 ACCW(NV41_FBTIL5ED, (si->ps.memory_size - 1)); 169 ACCW(NV41_FBTIL6ED, (si->ps.memory_size - 1)); 170 ACCW(NV41_FBTIL7ED, (si->ps.memory_size - 1)); 171 ACCW(NV41_FBTIL8ED, (si->ps.memory_size - 1)); 172 ACCW(NV41_FBTIL9ED, (si->ps.memory_size - 1)); 173 ACCW(NV41_FBTILAED, (si->ps.memory_size - 1)); 174 ACCW(NV41_FBTILBED, (si->ps.memory_size - 1)); 175 176 if (si->ps.card_type == NV47) 177 /* or ID == 0x01dx or ID == 0x029x: but no cards defined yet */ 178 { 179 ACCW(NV47_FBTILCAD, 0); 180 ACCW(NV47_FBTILDAD, 0); 181 ACCW(NV47_FBTILEAD, 0); 182 ACCW(NV47_FBTILCED, (si->ps.memory_size - 1)); 183 ACCW(NV47_FBTILDED, (si->ps.memory_size - 1)); 184 ACCW(NV47_FBTILEED, (si->ps.memory_size - 1)); 185 } 186 } 187 } 188 189 /*** PRAMIN ***/ 190 /* first clear the entire RAMHT (hash-table) space to a defined state. It turns 191 * out at least NV11 will keep the previously programmed handles over resets and 192 * power-outages upto about 15 seconds!! Faulty entries might well hang the 193 * engine (confirmed on NV11). 194 * Note: 195 * this behaviour is not very strange: even very old DRAM chips are known to be 196 * able to do this, even though you should refresh them every few milliseconds or 197 * so. (Large memory cell capacitors, though different cells vary a lot in their 198 * capacity.) 199 * Of course data validity is not certain by a long shot over this large 200 * amount of time.. */ 201 for(cnt = 0; cnt < 0x0400; cnt++) 202 NV_REG32(NVACC_HT_HANDL_00 + (cnt << 2)) = 0; 203 /* RAMHT (hash-table) space SETUP FIFO HANDLES */ 204 /* note: 205 * 'instance' tells you where the engine command is stored in 'PR_CTXx_x' sets 206 * below: instance being b4-19 with baseadress NV_PRAMIN_CTX_0 (0x00700000). 207 * That command is linked to the handle noted here. This handle is then used to 208 * tell the FIFO to which engine command it is connected! 209 * (CTX registers are actually a sort of RAM space.) */ 210 if (si->ps.card_arch >= NV40A) 211 { 212 /* (first set) */ 213 ACCW(HT_HANDL_00, (0x80000000 | NV10_CONTEXT_SURFACES_2D)); /* 32bit handle (not used) */ 214 ACCW(HT_VALUE_00, 0x0010114c); /* instance $114c, engine = acc engine, CHID = $00 */ 215 216 ACCW(HT_HANDL_01, (0x80000000 | NV_IMAGE_BLIT)); /* 32bit handle */ 217 ACCW(HT_VALUE_01, 0x00101148); /* instance $1148, engine = acc engine, CHID = $00 */ 218 219 ACCW(HT_HANDL_02, (0x80000000 | NV4_GDI_RECTANGLE_TEXT)); /* 32bit handle */ 220 ACCW(HT_VALUE_02, 0x0010114a); /* instance $114a, engine = acc engine, CHID = $00 */ 221 222 /* (second set) */ 223 ACCW(HT_HANDL_10, (0x80000000 | NV_ROP5_SOLID)); /* 32bit handle */ 224 ACCW(HT_VALUE_10, 0x00101142); /* instance $1142, engine = acc engine, CHID = $00 */ 225 226 ACCW(HT_HANDL_11, (0x80000000 | NV_IMAGE_BLACK_RECTANGLE)); /* 32bit handle */ 227 ACCW(HT_VALUE_11, 0x00101144); /* instance $1144, engine = acc engine, CHID = $00 */ 228 229 ACCW(HT_HANDL_12, (0x80000000 | NV_IMAGE_PATTERN)); /* 32bit handle */ 230 ACCW(HT_VALUE_12, 0x00101146); /* instance $1146, engine = acc engine, CHID = $00 */ 231 232 ACCW(HT_HANDL_13, (0x80000000 | NV_SCALED_IMAGE_FROM_MEMORY)); /* 32bit handle */ 233 ACCW(HT_VALUE_13, 0x0010114e); /* instance $114e, engine = acc engine, CHID = $00 */ 234 } 235 else 236 { 237 /* (first set) */ 238 ACCW(HT_HANDL_00, (0x80000000 | NV4_SURFACE)); /* 32bit handle */ 239 ACCW(HT_VALUE_00, 0x80011145); /* instance $1145, engine = acc engine, CHID = $00 */ 240 241 ACCW(HT_HANDL_01, (0x80000000 | NV_IMAGE_BLIT)); /* 32bit handle */ 242 ACCW(HT_VALUE_01, 0x80011146); /* instance $1146, engine = acc engine, CHID = $00 */ 243 244 ACCW(HT_HANDL_02, (0x80000000 | NV4_GDI_RECTANGLE_TEXT)); /* 32bit handle */ 245 ACCW(HT_VALUE_02, 0x80011147); /* instance $1147, engine = acc engine, CHID = $00 */ 246 247 ACCW(HT_HANDL_03, (0x80000000 | NV4_CONTEXT_SURFACES_ARGB_ZS)); /* 32bit handle (3D) */ 248 ACCW(HT_VALUE_03, 0x80011148); /* instance $1148, engine = acc engine, CHID = $00 */ 249 250 /* NV4_ and NV10_DX5_TEXTURE_TRIANGLE should be identical */ 251 ACCW(HT_HANDL_04, (0x80000000 | NV4_DX5_TEXTURE_TRIANGLE)); /* 32bit handle (3D) */ 252 ACCW(HT_VALUE_04, 0x80011149); /* instance $1149, engine = acc engine, CHID = $00 */ 253 254 /* NV4_ and NV10_DX6_MULTI_TEXTURE_TRIANGLE should be identical */ 255 ACCW(HT_HANDL_05, (0x80000000 | NV4_DX6_MULTI_TEXTURE_TRIANGLE)); /* 32bit handle (not used) */ 256 ACCW(HT_VALUE_05, 0x8001114a); /* instance $114a, engine = acc engine, CHID = $00 */ 257 258 ACCW(HT_HANDL_06, (0x80000000 | NV1_RENDER_SOLID_LIN)); /* 32bit handle (not used) */ 259 ACCW(HT_VALUE_06, 0x8001114c); /* instance $114c, engine = acc engine, CHID = $00 */ 260 261 /* (second set) */ 262 ACCW(HT_HANDL_10, (0x80000000 | NV_ROP5_SOLID)); /* 32bit handle */ 263 ACCW(HT_VALUE_10, 0x80011142); /* instance $1142, engine = acc engine, CHID = $00 */ 264 265 ACCW(HT_HANDL_11, (0x80000000 | NV_IMAGE_BLACK_RECTANGLE)); /* 32bit handle */ 266 ACCW(HT_VALUE_11, 0x80011143); /* instance $1143, engine = acc engine, CHID = $00 */ 267 268 ACCW(HT_HANDL_12, (0x80000000 | NV_IMAGE_PATTERN)); /* 32bit handle */ 269 ACCW(HT_VALUE_12, 0x80011144); /* instance $1144, engine = acc engine, CHID = $00 */ 270 271 ACCW(HT_HANDL_13, (0x80000000 | NV_SCALED_IMAGE_FROM_MEMORY)); /* 32bit handle */ 272 ACCW(HT_VALUE_13, 0x8001114b); /* instance $114b, engine = acc engine, CHID = $00 */ 273 } 274 275 /* program CTX registers: CTX1 is mostly done later (colorspace dependant) */ 276 /* note: 277 * CTX determines which HT handles point to what engine commands. */ 278 /* note also: 279 * CTX registers are in fact in the same GPU internal RAM space as the engine's 280 * hashtable. This means that stuff programmed in here also survives resets and 281 * power-outages! (confirmed NV11) */ 282 if (si->ps.card_arch >= NV40A) 283 { 284 /* setup a DMA define for use by command defines below. */ 285 ACCW(PR_CTX0_R, 0x00003000); /* DMA page table present and of linear type; 286 * DMA target node is NVM (non-volatile memory?) 287 * (instead of doing PCI or AGP transfers) */ 288 ACCW(PR_CTX1_R, (si->ps.memory_size - 1)); /* DMA limit: size is all cardRAM */ 289 ACCW(PR_CTX2_R, ((0x00000000 & 0xfffff000) | 0x00000002)); 290 /* DMA access type is READ_AND_WRITE; 291 * memory starts at start of cardRAM (b12-31): 292 * It's adress needs to be at a 4kb boundary! */ 293 ACCW(PR_CTX3_R, 0x00000002); /* unknown (looks like this is rubbish/not needed?) */ 294 /* setup set '0' for cmd NV_ROP5_SOLID */ 295 ACCW(PR_CTX0_0, 0x02080043); /* NVclass $043, patchcfg ROP_AND, nv10+: little endian */ 296 ACCW(PR_CTX1_0, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 297 ACCW(PR_CTX2_0, 0x00000000); /* DMA0 and DMA1 instance invalid */ 298 ACCW(PR_CTX3_0, 0x00000000); /* method traps disabled */ 299 ACCW(PR_CTX0_1, 0x00000000); /* extra */ 300 ACCW(PR_CTX1_1, 0x00000000); /* extra */ 301 /* setup set '1' for cmd NV_IMAGE_BLACK_RECTANGLE */ 302 ACCW(PR_CTX0_2, 0x02080019); /* NVclass $019, patchcfg ROP_AND, nv10+: little endian */ 303 ACCW(PR_CTX1_2, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 304 ACCW(PR_CTX2_2, 0x00000000); /* DMA0 and DMA1 instance invalid */ 305 ACCW(PR_CTX3_2, 0x00000000); /* method traps disabled */ 306 ACCW(PR_CTX0_3, 0x00000000); /* extra */ 307 ACCW(PR_CTX1_3, 0x00000000); /* extra */ 308 /* setup set '2' for cmd NV_IMAGE_PATTERN */ 309 ACCW(PR_CTX0_4, 0x02080018); /* NVclass $018, patchcfg ROP_AND, nv10+: little endian */ 310 ACCW(PR_CTX1_4, 0x02000000); /* colorspace not set, notify instance is $0200 (b16-31) */ 311 ACCW(PR_CTX2_4, 0x00000000); /* DMA0 and DMA1 instance invalid */ 312 ACCW(PR_CTX3_4, 0x00000000); /* method traps disabled */ 313 ACCW(PR_CTX0_5, 0x00000000); /* extra */ 314 ACCW(PR_CTX1_5, 0x00000000); /* extra */ 315 /* setup set '4' for cmd NV_IMAGE_BLIT */ 316 ACCW(PR_CTX0_6, 0x0208005f); /* NVclass $05f, patchcfg ROP_AND, nv10+: little endian */ 317 ACCW(PR_CTX1_6, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 318 ACCW(PR_CTX2_6, 0x00001140); /* DMA0 instance is $1140, DMA1 instance invalid */ 319 ACCW(PR_CTX3_6, 0x00001140); /* method trap 0 is $1140, trap 1 disabled */ 320 ACCW(PR_CTX0_7, 0x00000000); /* extra */ 321 ACCW(PR_CTX1_7, 0x00000000); /* extra */ 322 /* setup set '5' for cmd NV4_GDI_RECTANGLE_TEXT */ 323 ACCW(PR_CTX0_8, 0x0208004a); /* NVclass $04a, patchcfg ROP_AND, nv10+: little endian */ 324 ACCW(PR_CTX1_8, 0x02000000); /* colorspace not set, notify instance is $0200 (b16-31) */ 325 ACCW(PR_CTX2_8, 0x00000000); /* DMA0 and DMA1 instance invalid */ 326 ACCW(PR_CTX3_8, 0x00000000); /* method traps disabled */ 327 ACCW(PR_CTX0_9, 0x00000000); /* extra */ 328 ACCW(PR_CTX1_9, 0x00000000); /* extra */ 329 /* setup set '6' for cmd NV10_CONTEXT_SURFACES_2D */ 330 ACCW(PR_CTX0_A, 0x02080062); /* NVclass $062, nv10+: little endian */ 331 ACCW(PR_CTX1_A, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 332 ACCW(PR_CTX2_A, 0x00001140); /* DMA0 instance is $1140, DMA1 instance invalid */ 333 ACCW(PR_CTX3_A, 0x00001140); /* method trap 0 is $1140, trap 1 disabled */ 334 ACCW(PR_CTX0_B, 0x00000000); /* extra */ 335 ACCW(PR_CTX1_B, 0x00000000); /* extra */ 336 /* setup set '7' for cmd NV_SCALED_IMAGE_FROM_MEMORY */ 337 ACCW(PR_CTX0_C, 0x02080077); /* NVclass $077, nv10+: little endian */ 338 ACCW(PR_CTX1_C, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 339 ACCW(PR_CTX2_C, 0x00001140); /* DMA0 instance is $1140, DMA1 instance invalid */ 340 ACCW(PR_CTX3_C, 0x00001140); /* method trap 0 is $1140, trap 1 disabled */ 341 ACCW(PR_CTX0_D, 0x00000000); /* extra */ 342 ACCW(PR_CTX1_D, 0x00000000); /* extra */ 343 /* setup DMA set pointed at by PF_CACH1_DMAI */ 344 ACCW(PR_CTX0_E, 0x00003002); /* DMA page table present and of linear type; 345 * DMA class is $002 (b0-11); 346 * DMA target node is NVM (non-volatile memory?) 347 * (instead of doing PCI or AGP transfers) */ 348 ACCW(PR_CTX1_E, 0x00007fff); /* DMA limit: tablesize is 32k bytes */ 349 ACCW(PR_CTX2_E, (((si->ps.memory_size - 1) & 0xffff8000) | 0x00000002)); 350 /* DMA access type is READ_AND_WRITE; 351 * table is located at end of cardRAM (b12-31): 352 * It's adress needs to be at a 4kb boundary! */ 353 } 354 else 355 { 356 /* setup a DMA define for use by command defines below. */ 357 ACCW(PR_CTX0_R, 0x00003000); /* DMA page table present and of linear type; 358 * DMA target node is NVM (non-volatile memory?) 359 * (instead of doing PCI or AGP transfers) */ 360 ACCW(PR_CTX1_R, (si->ps.memory_size - 1)); /* DMA limit: size is all cardRAM */ 361 ACCW(PR_CTX2_R, ((0x00000000 & 0xfffff000) | 0x00000002)); 362 /* DMA access type is READ_AND_WRITE; 363 * memory starts at start of cardRAM (b12-31): 364 * It's adress needs to be at a 4kb boundary! */ 365 ACCW(PR_CTX3_R, 0x00000002); /* unknown (looks like this is rubbish/not needed?) */ 366 /* setup set '0' for cmd NV_ROP5_SOLID */ 367 ACCW(PR_CTX0_0, 0x01008043); /* NVclass $043, patchcfg ROP_AND, nv10+: little endian */ 368 ACCW(PR_CTX1_0, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 369 ACCW(PR_CTX2_0, 0x00000000); /* DMA0 and DMA1 instance invalid */ 370 ACCW(PR_CTX3_0, 0x00000000); /* method traps disabled */ 371 /* setup set '1' for cmd NV_IMAGE_BLACK_RECTANGLE */ 372 ACCW(PR_CTX0_1, 0x01008019); /* NVclass $019, patchcfg ROP_AND, nv10+: little endian */ 373 ACCW(PR_CTX1_1, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 374 ACCW(PR_CTX2_1, 0x00000000); /* DMA0 and DMA1 instance invalid */ 375 ACCW(PR_CTX3_1, 0x00000000); /* method traps disabled */ 376 /* setup set '2' for cmd NV_IMAGE_PATTERN */ 377 ACCW(PR_CTX0_2, 0x01008018); /* NVclass $018, patchcfg ROP_AND, nv10+: little endian */ 378 ACCW(PR_CTX1_2, 0x00000002); /* colorspace not set, notify instance is $0200 (b16-31) */ 379 ACCW(PR_CTX2_2, 0x00000000); /* DMA0 and DMA1 instance invalid */ 380 ACCW(PR_CTX3_2, 0x00000000); /* method traps disabled */ 381 /* setup set '3' for ... */ 382 if(si->ps.card_arch >= NV10A) 383 { 384 /* ... cmd NV10_CONTEXT_SURFACES_2D */ 385 ACCW(PR_CTX0_3, 0x01008062); /* NVclass $062, nv10+: little endian */ 386 } 387 else 388 { 389 /* ... cmd NV4_SURFACE */ 390 ACCW(PR_CTX0_3, 0x01008042); /* NVclass $042, nv10+: little endian */ 391 } 392 ACCW(PR_CTX1_3, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 393 ACCW(PR_CTX2_3, 0x11401140); /* DMA0 instance is $1140, DMA1 instance invalid */ 394 ACCW(PR_CTX3_3, 0x00000000); /* method trap 0 is $1140, trap 1 disabled */ 395 /* setup set '4' for cmd NV_IMAGE_BLIT */ 396 ACCW(PR_CTX0_4, 0x0100805f); /* NVclass $05f, patchcfg ROP_AND, nv10+: little endian */ 397 ACCW(PR_CTX1_4, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 398 ACCW(PR_CTX2_4, 0x11401140); /* DMA0 instance is $1140, DMA1 instance invalid */ 399 ACCW(PR_CTX3_4, 0x00000000); /* method trap 0 is $1140, trap 1 disabled */ 400 /* setup set '5' for cmd NV4_GDI_RECTANGLE_TEXT */ 401 ACCW(PR_CTX0_5, 0x0100804a); /* NVclass $04a, patchcfg ROP_AND, nv10+: little endian */ 402 ACCW(PR_CTX1_5, 0x00000002); /* colorspace not set, notify instance is $0200 (b16-31) */ 403 ACCW(PR_CTX2_5, 0x00000000); /* DMA0 and DMA1 instance invalid */ 404 ACCW(PR_CTX3_5, 0x00000000); /* method traps disabled */ 405 /* setup set '6' ... */ 406 if (si->ps.card_arch >= NV10A) 407 { 408 /* ... for cmd NV10_CONTEXT_SURFACES_ARGB_ZS */ 409 ACCW(PR_CTX0_6, 0x00000093); /* NVclass $093, nv10+: little endian */ 410 } 411 else 412 { 413 /* ... for cmd NV4_CONTEXT_SURFACES_ARGB_ZS */ 414 ACCW(PR_CTX0_6, 0x00000053); /* NVclass $053, nv10+: little endian */ 415 } 416 ACCW(PR_CTX1_6, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 417 ACCW(PR_CTX2_6, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 418 ACCW(PR_CTX3_6, 0x00000000); /* method traps disabled */ 419 /* setup set '7' ... */ 420 if (si->ps.card_arch >= NV10A) 421 { 422 /* ... for cmd NV10_DX5_TEXTURE_TRIANGLE */ 423 ACCW(PR_CTX0_7, 0x0300a094); /* NVclass $094, patchcfg ROP_AND, userclip enable, 424 * context surface0 valid, nv10+: little endian */ 425 } 426 else 427 { 428 /* ... for cmd NV4_DX5_TEXTURE_TRIANGLE */ 429 ACCW(PR_CTX0_7, 0x0300a054); /* NVclass $054, patchcfg ROP_AND, userclip enable, 430 * context surface0 valid */ 431 } 432 ACCW(PR_CTX1_7, 0x00000d01); /* format is A8RGB24, MSB mono */ 433 ACCW(PR_CTX2_7, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 434 ACCW(PR_CTX3_7, 0x00000000); /* method traps disabled */ 435 /* setup set '8' ... */ 436 if (si->ps.card_arch >= NV10A) 437 { 438 /* ... for cmd NV10_DX6_MULTI_TEXTURE_TRIANGLE (not used) */ 439 ACCW(PR_CTX0_8, 0x0300a095); /* NVclass $095, patchcfg ROP_AND, userclip enable, 440 * context surface0 valid, nv10+: little endian */ 441 } 442 else 443 { 444 /* ... for cmd NV4_DX6_MULTI_TEXTURE_TRIANGLE (not used) */ 445 ACCW(PR_CTX0_8, 0x0300a055); /* NVclass $055, patchcfg ROP_AND, userclip enable, 446 * context surface0 valid */ 447 } 448 ACCW(PR_CTX1_8, 0x00000d01); /* format is A8RGB24, MSB mono */ 449 ACCW(PR_CTX2_8, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 450 ACCW(PR_CTX3_8, 0x00000000); /* method traps disabled */ 451 /* setup set '9' for cmd NV_SCALED_IMAGE_FROM_MEMORY */ 452 ACCW(PR_CTX0_9, 0x01018077); /* NVclass $077, patchcfg SRC_COPY, 453 * context surface0 valid, nv10+: little endian */ 454 ACCW(PR_CTX1_9, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 455 ACCW(PR_CTX2_9, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 456 ACCW(PR_CTX3_9, 0x00000000); /* method traps disabled */ 457 /* setup set 'A' for cmd NV1_RENDER_SOLID_LIN (not used) */ 458 ACCW(PR_CTX0_A, 0x0300a01c); /* NVclass $01c, patchcfg ROP_AND, userclip enable, 459 * context surface0 valid, nv10+: little endian */ 460 ACCW(PR_CTX1_A, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 461 ACCW(PR_CTX2_A, 0x11401140); /* DMA0, DMA1 instance = $1140 */ 462 ACCW(PR_CTX3_A, 0x00000000); /* method traps disabled */ 463 /* setup DMA set pointed at by PF_CACH1_DMAI */ 464 if (si->engine.agp_mode) 465 { 466 /* DMA page table present and of linear type; 467 * DMA class is $002 (b0-11); 468 * DMA target node is AGP */ 469 ACCW(PR_CTX0_B, 0x00033002); 470 } 471 else 472 { 473 /* DMA page table present and of linear type; 474 * DMA class is $002 (b0-11); 475 * DMA target node is PCI */ 476 ACCW(PR_CTX0_B, 0x00023002); 477 } 478 ACCW(PR_CTX1_B, 0x000fffff); /* DMA limit: tablesize is 1M bytes */ 479 ACCW(PR_CTX2_B, (((uint32)((uint8 *)(si->dma_buffer_pci))) | 0x00000002)); 480 /* DMA access type is READ_AND_WRITE; 481 * table is located in main system RAM (b12-31): 482 * It's adress needs to be at a 4kb boundary! */ 483 484 //3D stuff: 485 /* 486 rud's (temp.) notes: 487 (problem: 3D driver renders in 32bit whatever the frontbuffer space in DMA mode.) 488 - the colorspace dependant info under 'acc engine' also sets the outcome for the 489 3D add-on. I don't know yet if the 3D render funcs render in the frontbuffer 490 space and the back-to-front blit isn't set (stays in 32bit!) (likely), 491 or if the 3D funcs render always in 32bit space and back-to-front blit color- 492 space converts... I'll try to nail this down at some point. 493 - the colorspace dependant info under 'pramin' is needed to get the 3D related 494 surface commands up and running. An alternate solution would probably be calling 495 the surface command with the colorspace set. 496 */ 497 switch(si->dm.space) 498 { 499 case B_CMAP8: 500 /* acc engine */ 501 ACCW(FORMATS, 0x00001010); 502 if (si->ps.card_arch < NV30A) 503 /* set depth 0-5: $1 = Y8 */ 504 ACCW(BPIXEL, 0x00111111); 505 else 506 /* set depth 0-1: $1 = Y8, $2 = X1R5G5B5_Z1R5G5B5 */ 507 ACCW(BPIXEL, 0x00000021); 508 ACCW(STRD_FMT, 0x03020202); 509 /* PRAMIN */ 510 if (si->ps.card_arch == NV04A) 511 ACCW(PR_CTX1_6, 0x00000302); /* format is X24Y8, LSB mono */ 512 else 513 ACCW(PR_CTX1_6, 0x00000000); /* format is invalid */ 514 ACCW(PR_CTX1_A, 0x00000302); /* format is X24Y8, LSB mono */ 515 break; 516 case B_RGB15_LITTLE: 517 /* acc engine */ 518 ACCW(FORMATS, 0x00002071); 519 if (si->ps.card_arch < NV30A) 520 /* set depth 0-5: $2 = X1R5G5B5_Z1R5G5B5, $6 = Y16 */ 521 ACCW(BPIXEL, 0x00226222); 522 else 523 /* set depth 0-1: $2 = X1R5G5B5_Z1R5G5B5, $4 = A1R5G5B5 */ 524 ACCW(BPIXEL, 0x00000042); 525 ACCW(STRD_FMT, 0x09080808); 526 /* PRAMIN */ 527 ACCW(PR_CTX1_6, 0x00000902); /* format is X17RGB15, LSB mono */ 528 ACCW(PR_CTX1_A, 0x00000902); /* format is X17RGB15, LSB mono */ 529 break; 530 case B_RGB16_LITTLE: 531 /* acc engine */ 532 ACCW(FORMATS, 0x000050C2); 533 if (si->ps.card_arch < NV30A) 534 /* set depth 0-5: $5 = R5G6B5, $6 = Y16 */ 535 ACCW(BPIXEL, 0x00556555); 536 else 537 /* set depth 0-1: $5 = R5G6B5, $a = X1A7R8G8B8_O1A7R8G8B8 */ 538 ACCW(BPIXEL, 0x000000a5); 539 if (si->ps.card_arch == NV04A) 540 ACCW(STRD_FMT, 0x0c0b0b0b); 541 else 542 ACCW(STRD_FMT, 0x000b0b0c); 543 /* PRAMIN */ 544 ACCW(PR_CTX1_6, 0x00000c02); /* format is X16RGB16, LSB mono */ 545 ACCW(PR_CTX1_A, 0x00000c02); /* format is X16RGB16, LSB mono */ 546 break; 547 case B_RGB32_LITTLE: 548 case B_RGBA32_LITTLE: 549 /* acc engine */ 550 ACCW(FORMATS, 0x000070e5); 551 if (si->ps.card_arch < NV30A) 552 /* set depth 0-5: $7 = X8R8G8B8_Z8R8G8B8, $d = Y32 */ 553 ACCW(BPIXEL, 0x0077d777); 554 else 555 /* set depth 0-1: $7 = X8R8G8B8_Z8R8G8B8, $e = V8YB8U8YA8 */ 556 ACCW(BPIXEL, 0x000000e7); 557 ACCW(STRD_FMT, 0x0e0d0d0d); 558 /* PRAMIN */ 559 ACCW(PR_CTX1_6, 0x00000e02); /* format is X8RGB24, LSB mono */ 560 ACCW(PR_CTX1_A, 0x00000e02); /* format is X8RGB24, LSB mono */ 561 break; 562 default: 563 LOG(8,("ACC: init, invalid bit depth\n")); 564 return B_ERROR; 565 } 566 //end 3D stuff. 567 } 568 569 if (si->ps.card_arch == NV04A) 570 { 571 /* do a explicit engine reset */ 572 ACCW(DEBUG0, 0x000001ff); 573 574 /* init some function blocks */ 575 ACCW(DEBUG0, 0x1230c000); 576 ACCW(DEBUG1, 0x72111101); 577 ACCW(DEBUG2, 0x11d5f071); 578 ACCW(DEBUG3, 0x0004ff31); 579 /* init OP methods */ 580 ACCW(DEBUG3, 0x4004ff31); 581 582 /* disable all acceleration engine INT reguests */ 583 ACCW(ACC_INTE, 0x00000000); 584 /* reset all acceration engine INT status bits */ 585 ACCW(ACC_INTS, 0xffffffff); 586 /* context control enabled */ 587 ACCW(NV04_CTX_CTRL, 0x10010100); 588 /* all acceleration buffers, pitches and colors are valid */ 589 ACCW(NV04_ACC_STAT, 0xffffffff); 590 /* enable acceleration engine command FIFO */ 591 ACCW(FIFO_EN, 0x00000001); 592 593 /* setup location of active screen in framebuffer */ 594 ACCW(OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 595 ACCW(OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 596 /* setup accesible card memory range */ 597 ACCW(BLIMIT0, (si->ps.memory_size - 1)); 598 ACCW(BLIMIT1, (si->ps.memory_size - 1)); 599 600 /* pattern shape value = 8x8, 2 color */ 601 //fixme: not needed, unless the engine has a hardware fault (setting via cmd)! 602 //ACCW(PAT_SHP, 0x00000000); 603 /* Pgraph Beta AND value (fraction) b23-30 */ 604 ACCW(BETA_AND_VAL, 0xffffffff); 605 } 606 else 607 { 608 /* do a explicit engine reset */ 609 ACCW(DEBUG0, 0xffffffff); 610 ACCW(DEBUG0, 0x00000000); 611 /* disable all acceleration engine INT reguests */ 612 ACCW(ACC_INTE, 0x00000000); 613 /* reset all acceration engine INT status bits */ 614 ACCW(ACC_INTS, 0xffffffff); 615 /* context control enabled */ 616 ACCW(NV10_CTX_CTRL, 0x10010100); 617 /* all acceleration buffers, pitches and colors are valid */ 618 ACCW(NV10_ACC_STAT, 0xffffffff); 619 /* enable acceleration engine command FIFO */ 620 ACCW(FIFO_EN, 0x00000001); 621 /* setup surface type: 622 * b1-0 = %01 = surface type is non-swizzle; 623 * this is needed to enable 3D on NV1x (confirmed) and maybe others? */ 624 ACCW(NV10_SURF_TYP, ((ACCR(NV10_SURF_TYP)) & 0x0007ff00)); 625 ACCW(NV10_SURF_TYP, ((ACCR(NV10_SURF_TYP)) | 0x00020101)); 626 } 627 628 if (si->ps.card_arch == NV10A) 629 { 630 /* init some function blocks */ 631 ACCW(DEBUG1, 0x00118700); 632 /* DEBUG2 has a big influence on 3D speed for NV15 (confirmed) */ 633 ACCW(DEBUG2, 0x24f82ad9); 634 ACCW(DEBUG3, 0x55de0030); 635 636 /* copy tile setup stuff from 'source' to acc engine */ 637 for (cnt = 0; cnt < 32; cnt++) 638 { 639 NV_REG32(NVACC_NV10_TIL0AD + (cnt << 2)) = 640 NV_REG32(NVACC_NV10_FBTIL0AD + (cnt << 2)); 641 } 642 643 /* setup location of active screen in framebuffer */ 644 ACCW(OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 645 ACCW(OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 646 /* setup accesible card memory range */ 647 ACCW(BLIMIT0, (si->ps.memory_size - 1)); 648 ACCW(BLIMIT1, (si->ps.memory_size - 1)); 649 650 /* pattern shape value = 8x8, 2 color */ 651 //fixme: not needed, unless the engine has a hardware fault (setting via cmd)! 652 //ACCW(PAT_SHP, 0x00000000); 653 /* Pgraph Beta AND value (fraction) b23-30 */ 654 ACCW(BETA_AND_VAL, 0xffffffff); 655 } 656 657 if (si->ps.card_arch >= NV20A) 658 { 659 switch (si->ps.card_arch) 660 { 661 case NV40A: 662 /* init some function blocks */ 663 ACCW(DEBUG1, 0x401287c0); 664 ACCW(DEBUG3, 0x60de8051); 665 /* disable specific functions, but enable SETUP_SPARE2 register */ 666 ACCW(NV10_DEBUG4, 0x00008000); 667 /* set limit_viol_pix_adress(?): more likely something unknown.. */ 668 ACCW(NV25_WHAT0, 0x00be3c5f); 669 670 /* setup some unknown serially accessed registers (?) */ 671 tmp = (NV_REG32(NV32_NV4X_WHAT0) & 0x000000ff); 672 for (cnt = 0; (tmp && !(tmp & 0x00000001)); tmp >>= 1, cnt++); 673 { 674 ACCW(NV4X_WHAT2, cnt); 675 } 676 677 /* unknown.. */ 678 switch (si->ps.card_type) 679 { 680 case NV40: 681 case NV45: 682 /* and NV48: but these are pgm'd as NV45 currently */ 683 ACCW(NV40_WHAT0, 0x83280fff); 684 ACCW(NV40_WHAT1, 0x000000a0); 685 ACCW(NV40_WHAT2, 0x0078e366); 686 ACCW(NV40_WHAT3, 0x0000014c); 687 break; 688 case NV41: 689 /* and ID == 0x012x: but no cards defined yet */ 690 ACCW(NV40P_WHAT0, 0x83280eff); 691 ACCW(NV40P_WHAT1, 0x000000a0); 692 ACCW(NV40P_WHAT2, 0x007596ff); 693 ACCW(NV40P_WHAT3, 0x00000108); 694 break; 695 case NV43: 696 ACCW(NV40P_WHAT0, 0x83280eff); 697 ACCW(NV40P_WHAT1, 0x000000a0); 698 ACCW(NV40P_WHAT2, 0x0072cb77); 699 ACCW(NV40P_WHAT3, 0x00000108); 700 break; 701 case NV44: 702 /* and ID == 0x01dx: but no cards defined yet */ 703 ACCW(NV40P_WHAT0, 0x83280eff); 704 ACCW(NV40P_WHAT1, 0x000000a0); 705 706 NV_REG32(NV32_NV44_WHAT10) = NV_REG32(NV32_NV10STRAPINFO); 707 NV_REG32(NV32_NV44_WHAT11) = 0x00000000; 708 NV_REG32(NV32_NV44_WHAT12) = 0x00000000; 709 NV_REG32(NV32_NV44_WHAT13) = NV_REG32(NV32_NV10STRAPINFO); 710 711 ACCW(NV44_WHAT2, 0x00000000); 712 ACCW(NV44_WHAT3, 0x00000000); 713 break; 714 /* case NV44 type 2: 715 //fixme if needed: doesn't seem to need the strapinfo thing.. 716 ACCW(NV40P_WHAT0, 0x83280eff); 717 ACCW(NV40P_WHAT1, 0x000000a0); 718 719 ACCW(NV44_WHAT2, 0x00000000); 720 ACCW(NV44_WHAT3, 0x00000000); 721 break; 722 */ case NV47: 723 /* and ID == 0x029x: but no cards defined yet */ 724 ACCW(NV40P_WHAT0, 0x83280eff); 725 ACCW(NV40P_WHAT1, 0x000000a0); 726 ACCW(NV40P_WHAT2, 0x07830610); 727 ACCW(NV40P_WHAT3, 0x0000016a); 728 break; 729 default: 730 ACCW(NV40P_WHAT0, 0x83280eff); 731 ACCW(NV40P_WHAT1, 0x000000a0); 732 break; 733 } 734 735 ACCW(NV10_TIL3PT, 0x2ffff800); 736 ACCW(NV10_TIL3ST, 0x00006000); 737 ACCW(NV4X_WHAT1, 0x01000000); 738 /* engine data source DMA instance = $1140 */ 739 ACCW(NV4X_DMA_SRC, 0x00001140); 740 break; 741 case NV30A: 742 /* init some function blocks, but most is unknown.. */ 743 ACCW(DEBUG1, 0x40108700); 744 ACCW(NV25_WHAT1, 0x00140000); 745 ACCW(DEBUG3, 0xf00e0431); 746 ACCW(NV10_DEBUG4, 0x00008000); 747 ACCW(NV25_WHAT0, 0xf04b1f36); 748 ACCW(NV20_WHAT3, 0x1002d888); 749 ACCW(NV25_WHAT2, 0x62ff007f); 750 break; 751 case NV20A: 752 /* init some function blocks, but most is unknown.. */ 753 ACCW(DEBUG1, 0x00118700); 754 ACCW(DEBUG3, 0xf20e0431); 755 ACCW(NV10_DEBUG4, 0x00000000); 756 ACCW(NV20_WHAT1, 0x00000040); 757 if (si->ps.card_type < NV25) 758 { 759 ACCW(NV20_WHAT2, 0x00080000); 760 ACCW(NV10_DEBUG5, 0x00000005); 761 ACCW(NV20_WHAT3, 0x45caa208); 762 ACCW(NV20_WHAT4, 0x24000000); 763 ACCW(NV20_WHAT5, 0x00000040); 764 765 /* copy some fixed RAM(?) configuration info(?) to some indexed registers: */ 766 /* b16-24 is select; b2-13 is adress in 32-bit words */ 767 ACCW(RDI_INDEX, 0x00e00038); 768 /* data is 32-bit */ 769 ACCW(RDI_DATA, 0x00000030); 770 /* copy some fixed RAM(?) configuration info(?) to some indexed registers: */ 771 /* b16-24 is select; b2-13 is adress in 32-bit words */ 772 ACCW(RDI_INDEX, 0x00e10038); 773 /* data is 32-bit */ 774 ACCW(RDI_DATA, 0x00000030); 775 } 776 else 777 { 778 ACCW(NV25_WHAT1, 0x00080000); 779 ACCW(NV25_WHAT0, 0x304b1fb6); 780 ACCW(NV20_WHAT3, 0x18b82880); 781 ACCW(NV20_WHAT4, 0x44000000); 782 ACCW(NV20_WHAT5, 0x40000080); 783 ACCW(NV25_WHAT2, 0x000000ff); 784 } 785 break; 786 } 787 788 /* NV20A, NV30A and NV40A: */ 789 /* copy tile setup stuff from previous setup 'source' to acc engine 790 * (pattern colorRAM?) */ 791 if ((si->ps.card_type <= NV40) || (si->ps.card_type == NV45)) 792 { 793 for (cnt = 0; cnt < 32; cnt++) 794 { 795 /* copy NV10_FBTIL0AD upto/including NV10_FBTIL7ST */ 796 NV_REG32(NVACC_NV20_WHAT0 + (cnt << 2)) = 797 NV_REG32(NVACC_NV10_FBTIL0AD + (cnt << 2)); 798 799 /* copy NV10_FBTIL0AD upto/including NV10_FBTIL7ST */ 800 NV_REG32(NVACC_NV20_2_WHAT0 + (cnt << 2)) = 801 NV_REG32(NVACC_NV10_FBTIL0AD + (cnt << 2)); 802 } 803 } 804 else 805 { 806 /* NV41, 43, 44, 47 */ 807 if (si->ps.card_type == NV47) 808 /* or ID == 0x01dx or ID == 0x029x: but no cards defined yet */ 809 { 810 for (cnt = 0; cnt < 60; cnt++) 811 { 812 /* copy NV41_FBTIL0AD upto/including NV47_FBTILEST */ 813 NV_REG32(NVACC_NV41_WHAT0 + (cnt << 2)) = 814 NV_REG32(NVACC_NV41_FBTIL0AD + (cnt << 2)); 815 816 /* copy NV41_FBTIL0AD upto/including NV47_FBTILEST */ 817 NV_REG32(NVACC_NV20_2_WHAT0 + (cnt << 2)) = 818 NV_REG32(NVACC_NV41_FBTIL0AD + (cnt << 2)); 819 } 820 } 821 else 822 { 823 /* NV41, 43, 44 */ 824 for (cnt = 0; cnt < 48; cnt++) 825 { 826 /* copy NV41_FBTIL0AD upto/including NV41_FBTILBST */ 827 NV_REG32(NVACC_NV20_WHAT0 + (cnt << 2)) = 828 NV_REG32(NVACC_NV41_FBTIL0AD + (cnt << 2)); 829 830 if (si->ps.card_type != NV44) 831 { 832 /* copy NV41_FBTIL0AD upto/including NV41_FBTILBST */ 833 NV_REG32(NVACC_NV20_2_WHAT0 + (cnt << 2)) = 834 NV_REG32(NVACC_NV41_FBTIL0AD + (cnt << 2)); 835 } 836 } 837 } 838 } 839 840 if (si->ps.card_arch >= NV40A) 841 { 842 if ((si->ps.card_type == NV40) || (si->ps.card_type == NV45)) 843 { 844 /* copy some RAM configuration info(?) */ 845 ACCW(NV20_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0)); 846 ACCW(NV20_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1)); 847 ACCW(NV40_WHAT_T2, NV_REG32(NV32_PFB_CONFIG_0)); 848 ACCW(NV40_WHAT_T3, NV_REG32(NV32_PFB_CONFIG_1)); 849 850 /* setup location of active screen in framebuffer */ 851 ACCW(NV20_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 852 ACCW(NV20_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 853 /* setup accesible card memory range */ 854 ACCW(NV20_BLIMIT6, (si->ps.memory_size - 1)); 855 ACCW(NV20_BLIMIT7, (si->ps.memory_size - 1)); 856 } 857 else 858 { 859 /* NV41, 43, 44, 47 */ 860 861 /* copy some RAM configuration info(?) */ 862 if (si->ps.card_type == NV47) 863 /* or ID == 0x01dx or ID == 0x029x: but no cards defined yet */ 864 { 865 ACCW(NV47_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0)); 866 ACCW(NV47_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1)); 867 } 868 else 869 { 870 /* NV41, 43, 44 */ 871 ACCW(NV40P_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0)); 872 ACCW(NV40P_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1)); 873 } 874 ACCW(NV40P_WHAT_T2, NV_REG32(NV32_PFB_CONFIG_0)); 875 ACCW(NV40P_WHAT_T3, NV_REG32(NV32_PFB_CONFIG_1)); 876 877 /* setup location of active screen in framebuffer */ 878 ACCW(NV40P_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 879 ACCW(NV40P_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 880 /* setup accesible card memory range */ 881 ACCW(NV40P_BLIMIT6, (si->ps.memory_size - 1)); 882 ACCW(NV40P_BLIMIT7, (si->ps.memory_size - 1)); 883 } 884 } 885 else /* NV20A and NV30A: */ 886 { 887 /* copy some RAM configuration info(?) */ 888 ACCW(NV20_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0)); 889 ACCW(NV20_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1)); 890 /* copy some RAM configuration info(?) to some indexed registers: */ 891 /* b16-24 is select; b2-13 is adress in 32-bit words */ 892 ACCW(RDI_INDEX, 0x00ea0000); 893 /* data is 32-bit */ 894 ACCW(RDI_DATA, NV_REG32(NV32_PFB_CONFIG_0)); 895 /* b16-24 is select; b2-13 is adress in 32-bit words */ 896 ACCW(RDI_INDEX, 0x00ea0004); 897 /* data is 32-bit */ 898 ACCW(RDI_DATA, NV_REG32(NV32_PFB_CONFIG_1)); 899 900 /* setup location of active screen in framebuffer */ 901 ACCW(NV20_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 902 ACCW(NV20_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 903 /* setup accesible card memory range */ 904 ACCW(NV20_BLIMIT6, (si->ps.memory_size - 1)); 905 ACCW(NV20_BLIMIT7, (si->ps.memory_size - 1)); 906 } 907 908 /* NV20A, NV30A and NV40A: */ 909 /* setup some acc engine tile stuff */ 910 ACCW(NV10_TIL2AD, 0x00000000); 911 ACCW(NV10_TIL0ED, 0xffffffff); 912 } 913 914 /* all cards: */ 915 /* setup clipping: rect size is 32768 x 32768, probably max. setting */ 916 /* note: 917 * can also be done via the NV_IMAGE_BLACK_RECTANGLE engine command. */ 918 ACCW(ABS_UCLP_XMIN, 0x00000000); 919 ACCW(ABS_UCLP_YMIN, 0x00000000); 920 ACCW(ABS_UCLP_XMAX, 0x00007fff); 921 ACCW(ABS_UCLP_YMAX, 0x00007fff); 922 923 /*** PFIFO ***/ 924 /* (setup caches) */ 925 /* disable caches reassign */ 926 ACCW(PF_CACHES, 0x00000000); 927 /* PFIFO mode: channel 0 is in DMA mode, channels 1 - 32 are in PIO mode */ 928 ACCW(PF_MODE, 0x00000001); 929 /* cache1 push0 access disabled */ 930 ACCW(PF_CACH1_PSH0, 0x00000000); 931 /* cache1 pull0 access disabled */ 932 ACCW(PF_CACH1_PUL0, 0x00000000); 933 /* cache1 push1 mode = DMA */ 934 if (si->ps.card_arch >= NV40A) 935 ACCW(PF_CACH1_PSH1, 0x00010000); 936 else 937 ACCW(PF_CACH1_PSH1, 0x00000100); 938 /* cache1 DMA Put offset = 0 (b2-28) */ 939 ACCW(PF_CACH1_DMAP, 0x00000000); 940 /* cache1 DMA Get offset = 0 (b2-28) */ 941 ACCW(PF_CACH1_DMAG, 0x00000000); 942 /* cache1 DMA instance adress = $114e (b0-15); 943 * instance being b4-19 with baseadress NV_PRAMIN_CTX_0 (0x00700000). */ 944 /* note: 945 * should point to a DMA definition in CTX register space (which is sort of RAM). 946 * This define tells the engine where the DMA cmd buffer is and what it's size is. 947 * Inside that cmd buffer you'll find the actual issued engine commands. */ 948 if (si->ps.card_arch >= NV40A) 949 ACCW(PF_CACH1_DMAI, 0x00001150); 950 else 951 ACCW(PF_CACH1_DMAI, 0x0000114d); 952 /* cache0 push0 access disabled */ 953 ACCW(PF_CACH0_PSH0, 0x00000000); 954 /* cache0 pull0 access disabled */ 955 ACCW(PF_CACH0_PUL0, 0x00000000); 956 /* RAM HT (hash table) baseadress = $10000 (b4-8), size = 4k, 957 * search = 128 (is byte offset between hash 'sets') */ 958 /* note: 959 * so HT base is $00710000, last is $00710fff. 960 * In this space you define the engine command handles (HT_HANDL_XX), which 961 * in turn points to the defines in CTX register space (which is sort of RAM) */ 962 ACCW(PF_RAMHT, 0x03000100); 963 /* RAM FC baseadress = $11000 (b3-8) (size is fixed to 0.5k(?)) */ 964 /* note: 965 * so FC base is $00711000, last is $007111ff. (not used?) */ 966 ACCW(PF_RAMFC, 0x00000110); 967 /* RAM RO baseadress = $11200 (b1-8), size = 0.5k */ 968 /* note: 969 * so RO base is $00711200, last is $007113ff. (not used?) */ 970 /* note also: 971 * This means(?) the PRAMIN CTX registers are accessible from base $00711400. */ 972 ACCW(PF_RAMRO, 0x00000112); 973 /* PFIFO size: ch0-15 = 512 bytes, ch16-31 = 124 bytes */ 974 ACCW(PF_SIZE, 0x0000ffff); 975 /* cache1 hash instance = $ffff (b0-15) */ 976 ACCW(PF_CACH1_HASH, 0x0000ffff); 977 /* disable all PFIFO INTs */ 978 ACCW(PF_INTEN, 0x00000000); 979 /* reset all PFIFO INT status bits */ 980 ACCW(PF_INTSTAT, 0xffffffff); 981 /* cache0 pull0 engine = acceleration engine (graphics) */ 982 ACCW(PF_CACH0_PUL1, 0x00000001); 983 /* cache1 DMA control: disable some stuff */ 984 ACCW(PF_CACH1_DMAC, 0x00000000); 985 /* cache1 engine 0 upto/including 7 is software (could also be graphics or DVD) */ 986 ACCW(PF_CACH1_ENG, 0x00000000); 987 /* cache1 DMA fetch: trigger at 128 bytes, size is 32 bytes, max requests is 15, 988 * use little endian */ 989 ACCW(PF_CACH1_DMAF, 0x000f0078); 990 /* cache1 DMA push: b0 = 1: access is enabled */ 991 ACCW(PF_CACH1_DMAS, 0x00000001); 992 /* cache1 push0 access enabled */ 993 ACCW(PF_CACH1_PSH0, 0x00000001); 994 /* cache1 pull0 access enabled */ 995 ACCW(PF_CACH1_PUL0, 0x00000001); 996 /* cache1 pull1 engine = acceleration engine (graphics) */ 997 ACCW(PF_CACH1_PUL1, 0x00000001); 998 /* enable PFIFO caches reassign */ 999 ACCW(PF_CACHES, 0x00000001); 1000 1001 /* setup 3D specifics */ 1002 nv_init_for_3D_dma(); 1003 1004 /*** init acceleration engine command info ***/ 1005 /* set object handles */ 1006 /* note: 1007 * probably depending on some other setup, there are 8 or 32 FIFO channels 1008 * available. Assuming the current setup only has 8 channels because the 'rest' 1009 * isn't setup here... */ 1010 si->engine.fifo.handle[0] = NV_ROP5_SOLID; 1011 si->engine.fifo.handle[1] = NV_IMAGE_BLACK_RECTANGLE; 1012 si->engine.fifo.handle[2] = NV_IMAGE_PATTERN; 1013 si->engine.fifo.handle[3] = NV4_SURFACE; /* NV10_CONTEXT_SURFACES_2D is identical */ 1014 si->engine.fifo.handle[4] = NV_IMAGE_BLIT; 1015 si->engine.fifo.handle[5] = NV4_GDI_RECTANGLE_TEXT; 1016 si->engine.fifo.handle[6] = NV4_CONTEXT_SURFACES_ARGB_ZS;//NV1_RENDER_SOLID_LIN; 1017 si->engine.fifo.handle[7] = NV4_DX5_TEXTURE_TRIANGLE; 1018 /* preset no FIFO channels assigned to cmd's */ 1019 for (cnt = 0; cnt < 0x20; cnt++) 1020 { 1021 si->engine.fifo.ch_ptr[cnt] = 0; 1022 } 1023 /* set handle's pointers to their assigned FIFO channels */ 1024 /* note: 1025 * b0-1 aren't used as adressbits. Using b0 to indicate a valid pointer. */ 1026 for (cnt = 0; cnt < 0x08; cnt++) 1027 { 1028 si->engine.fifo.ch_ptr[(si->engine.fifo.handle[cnt])] = 1029 (0x00000001 + (cnt * 0x00002000)); 1030 } 1031 1032 /*** init DMA command buffer info ***/ 1033 if (si->ps.card_arch >= NV40A) //main mem DMA buf on pre-NV40 1034 { 1035 si->dma_buffer = (void *)((char *)si->framebuffer + 1036 ((si->ps.memory_size - 1) & 0xffff8000)); 1037 } 1038 LOG(4,("ACC_DMA: command buffer is at adress $%08x\n", 1039 ((uint32)(si->dma_buffer)))); 1040 /* we have issued no DMA cmd's to the engine yet */ 1041 si->engine.dma.put = 0; 1042 /* the current first free adress in the DMA buffer is at offset 0 */ 1043 si->engine.dma.current = 0; 1044 /* the DMA buffer can hold 8k 32-bit words (it's 32kb in size), 1045 * or 256k 32-bit words (1Mb in size) dependant on architecture (for now) */ 1046 /* note: 1047 * one word is reserved at the end of the DMA buffer to be able to instruct the 1048 * engine to do a buffer wrap-around! 1049 * (DMA opcode 'noninc method': issue word $20000000.) */ 1050 if (si->ps.card_arch < NV40A) 1051 si->engine.dma.max = ((1 * 1024 * 1024) >> 2) - 1; 1052 else 1053 si->engine.dma.max = 8192 - 1; 1054 /* note the current free space we have left in the DMA buffer */ 1055 si->engine.dma.free = si->engine.dma.max - si->engine.dma.current; 1056 1057 /*** init FIFO via DMA command buffer. ***/ 1058 /* wait for room in fifo for new FIFO assigment cmds if needed: */ 1059 if (si->ps.card_arch >= NV40A) 1060 { 1061 if (nv_acc_fifofree_dma(12) != B_OK) return B_ERROR; 1062 } 1063 else 1064 { 1065 if (nv_acc_fifofree_dma(16) != B_OK) return B_ERROR; 1066 } 1067 1068 /* program new FIFO assignments */ 1069 /* Raster OPeration: */ 1070 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH0, si->engine.fifo.handle[0]); 1071 /* Clip: */ 1072 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH1, si->engine.fifo.handle[1]); 1073 /* Pattern: */ 1074 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH2, si->engine.fifo.handle[2]); 1075 /* 2D Surfaces: */ 1076 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH3, si->engine.fifo.handle[3]); 1077 /* Blit: */ 1078 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH4, si->engine.fifo.handle[4]); 1079 /* Bitmap: */ 1080 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH5, si->engine.fifo.handle[5]); 1081 if (si->ps.card_arch < NV40A) 1082 { 1083 /* 3D surfaces: (3D related only) */ 1084 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH6, si->engine.fifo.handle[6]); 1085 /* Textured Triangle: (3D only) */ 1086 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH7, si->engine.fifo.handle[7]); 1087 } 1088 1089 /*** Set pixel width ***/ 1090 switch(si->dm.space) 1091 { 1092 case B_CMAP8: 1093 surf_depth = 0x00000001; 1094 cmd_depth = 0x00000003; 1095 break; 1096 case B_RGB15_LITTLE: 1097 case B_RGB16_LITTLE: 1098 surf_depth = 0x00000004; 1099 cmd_depth = 0x00000001; 1100 break; 1101 case B_RGB32_LITTLE: 1102 case B_RGBA32_LITTLE: 1103 surf_depth = 0x00000006; 1104 cmd_depth = 0x00000003; 1105 break; 1106 default: 1107 LOG(8,("ACC_DMA: init, invalid bit depth\n")); 1108 return B_ERROR; 1109 } 1110 1111 /* wait for room in fifo for surface setup cmd if needed */ 1112 if (nv_acc_fifofree_dma(5) != B_OK) return B_ERROR; 1113 /* now setup 2D surface (writing 5 32bit words) */ 1114 nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 4); 1115 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = surf_depth; /* Format */ 1116 /* setup screen pitch */ 1117 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1118 ((si->fbc.bytes_per_row & 0x0000ffff) | (si->fbc.bytes_per_row << 16)); /* Pitch */ 1119 /* setup screen location */ 1120 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1121 ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer); /* OffsetSource */ 1122 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1123 ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer); /* OffsetDest */ 1124 1125 /* wait for room in fifo for pattern colordepth setup cmd if needed */ 1126 if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR; 1127 /* set pattern colordepth (writing 2 32bit words) */ 1128 nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETCOLORFORMAT, 1); 1129 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ 1130 1131 /* wait for room in fifo for bitmap colordepth setup cmd if needed */ 1132 if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR; 1133 /* set bitmap colordepth (writing 2 32bit words) */ 1134 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_SETCOLORFORMAT, 1); 1135 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ 1136 1137 /* Load our pattern into the engine: */ 1138 /* wait for room in fifo for pattern cmd if needed. */ 1139 if (nv_acc_fifofree_dma(7) != B_OK) return B_ERROR; 1140 /* now setup pattern (writing 7 32bit words) */ 1141 nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETSHAPE, 1); 1142 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000000; /* SetShape: 0 = 8x8, 1 = 64x1, 2 = 1x64 */ 1143 nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETCOLOR0, 4); 1144 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetColor0 */ 1145 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetColor1 */ 1146 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetPattern[0] */ 1147 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetPattern[1] */ 1148 1149 /* tell the engine to fetch and execute all (new) commands in the DMA buffer */ 1150 nv_start_dma(); 1151 1152 return B_OK; 1153 } 1154 1155 static void nv_init_for_3D_dma(void) 1156 { 1157 /* setup PGRAPH unknown registers and modify (pre-cleared) pipe stuff for 3D use */ 1158 if (si->ps.card_arch >= NV10A) 1159 { 1160 /* setup unknown PGRAPH stuff */ 1161 ACCW(PGWHAT_00, 0x00000000); 1162 ACCW(PGWHAT_01, 0x00000000); 1163 ACCW(PGWHAT_02, 0x00000000); 1164 ACCW(PGWHAT_03, 0x00000000); 1165 1166 ACCW(PGWHAT_04, 0x00001000); 1167 ACCW(PGWHAT_05, 0x00001000); 1168 ACCW(PGWHAT_06, 0x4003ff80); 1169 1170 ACCW(PGWHAT_07, 0x00000000); 1171 ACCW(PGWHAT_08, 0x00000000); 1172 ACCW(PGWHAT_09, 0x00000000); 1173 ACCW(PGWHAT_0A, 0x00000000); 1174 ACCW(PGWHAT_0B, 0x00000000); 1175 1176 ACCW(PGWHAT_0C, 0x00080008); 1177 ACCW(PGWHAT_0D, 0x00080008); 1178 1179 ACCW(PGWHAT_0E, 0x00000000); 1180 ACCW(PGWHAT_0F, 0x00000000); 1181 ACCW(PGWHAT_10, 0x00000000); 1182 ACCW(PGWHAT_11, 0x00000000); 1183 ACCW(PGWHAT_12, 0x00000000); 1184 ACCW(PGWHAT_13, 0x00000000); 1185 ACCW(PGWHAT_14, 0x00000000); 1186 ACCW(PGWHAT_15, 0x00000000); 1187 ACCW(PGWHAT_16, 0x00000000); 1188 ACCW(PGWHAT_17, 0x00000000); 1189 ACCW(PGWHAT_18, 0x00000000); 1190 1191 ACCW(PGWHAT_19, 0x10000000); 1192 1193 ACCW(PGWHAT_1A, 0x00000000); 1194 ACCW(PGWHAT_1B, 0x00000000); 1195 ACCW(PGWHAT_1C, 0x00000000); 1196 ACCW(PGWHAT_1D, 0x00000000); 1197 ACCW(PGWHAT_1E, 0x00000000); 1198 ACCW(PGWHAT_1F, 0x00000000); 1199 ACCW(PGWHAT_20, 0x00000000); 1200 ACCW(PGWHAT_21, 0x00000000); 1201 1202 ACCW(PGWHAT_22, 0x08000000); 1203 1204 ACCW(PGWHAT_23, 0x00000000); 1205 ACCW(PGWHAT_24, 0x00000000); 1206 ACCW(PGWHAT_25, 0x00000000); 1207 ACCW(PGWHAT_26, 0x00000000); 1208 1209 ACCW(PGWHAT_27, 0x4b7fffff); 1210 1211 ACCW(PGWHAT_28, 0x00000000); 1212 ACCW(PGWHAT_29, 0x00000000); 1213 ACCW(PGWHAT_2A, 0x00000000); 1214 1215 /* setup window clipping */ 1216 /* b0-11 = min; b16-27 = max. 1217 * note: 1218 * probably two's complement values, so setting to max range here: 1219 * which would be -2048 upto/including +2047. */ 1220 /* horizontal */ 1221 ACCW(WINCLIP_H_0, 0x07ff0800); 1222 ACCW(WINCLIP_H_1, 0x07ff0800); 1223 ACCW(WINCLIP_H_2, 0x07ff0800); 1224 ACCW(WINCLIP_H_3, 0x07ff0800); 1225 ACCW(WINCLIP_H_4, 0x07ff0800); 1226 ACCW(WINCLIP_H_5, 0x07ff0800); 1227 ACCW(WINCLIP_H_6, 0x07ff0800); 1228 ACCW(WINCLIP_H_7, 0x07ff0800); 1229 /* vertical */ 1230 ACCW(WINCLIP_V_0, 0x07ff0800); 1231 ACCW(WINCLIP_V_1, 0x07ff0800); 1232 ACCW(WINCLIP_V_2, 0x07ff0800); 1233 ACCW(WINCLIP_V_3, 0x07ff0800); 1234 ACCW(WINCLIP_V_4, 0x07ff0800); 1235 ACCW(WINCLIP_V_5, 0x07ff0800); 1236 ACCW(WINCLIP_V_6, 0x07ff0800); 1237 ACCW(WINCLIP_V_7, 0x07ff0800); 1238 1239 /* setup (initialize) pipe: 1240 * needed to get valid 3D rendering on (at least) NV1x cards. Without this 1241 * those cards produce rubbish instead of 3D, although the engine itself keeps 1242 * running and 2D stays OK. */ 1243 1244 /* set eyetype to local, lightning etc. is off */ 1245 ACCW(NV10_XFMOD0, 0x10000000); 1246 /* disable all lights */ 1247 ACCW(NV10_XFMOD1, 0x00000000); 1248 1249 /* note: upon writing data into the PIPEDAT register, the PIPEADR is 1250 * probably auto-incremented! */ 1251 /* (pipe adress = b2-16, pipe data = b0-31) */ 1252 /* note: pipe adresses IGRAPH registers? */ 1253 ACCW(NV10_PIPEADR, 0x00006740); 1254 ACCW(NV10_PIPEDAT, 0x00000000); 1255 ACCW(NV10_PIPEDAT, 0x00000000); 1256 ACCW(NV10_PIPEDAT, 0x00000000); 1257 ACCW(NV10_PIPEDAT, 0x3f800000); 1258 1259 ACCW(NV10_PIPEADR, 0x00006750); 1260 ACCW(NV10_PIPEDAT, 0x40000000); 1261 ACCW(NV10_PIPEDAT, 0x40000000); 1262 ACCW(NV10_PIPEDAT, 0x40000000); 1263 ACCW(NV10_PIPEDAT, 0x40000000); 1264 1265 ACCW(NV10_PIPEADR, 0x00006760); 1266 ACCW(NV10_PIPEDAT, 0x00000000); 1267 ACCW(NV10_PIPEDAT, 0x00000000); 1268 ACCW(NV10_PIPEDAT, 0x3f800000); 1269 ACCW(NV10_PIPEDAT, 0x00000000); 1270 1271 ACCW(NV10_PIPEADR, 0x00006770); 1272 ACCW(NV10_PIPEDAT, 0xc5000000); 1273 ACCW(NV10_PIPEDAT, 0xc5000000); 1274 ACCW(NV10_PIPEDAT, 0x00000000); 1275 ACCW(NV10_PIPEDAT, 0x00000000); 1276 1277 ACCW(NV10_PIPEADR, 0x00006780); 1278 ACCW(NV10_PIPEDAT, 0x00000000); 1279 ACCW(NV10_PIPEDAT, 0x00000000); 1280 ACCW(NV10_PIPEDAT, 0x3f800000); 1281 ACCW(NV10_PIPEDAT, 0x00000000); 1282 1283 ACCW(NV10_PIPEADR, 0x000067a0); 1284 ACCW(NV10_PIPEDAT, 0x3f800000); 1285 ACCW(NV10_PIPEDAT, 0x3f800000); 1286 ACCW(NV10_PIPEDAT, 0x3f800000); 1287 ACCW(NV10_PIPEDAT, 0x3f800000); 1288 1289 ACCW(NV10_PIPEADR, 0x00006ab0); 1290 ACCW(NV10_PIPEDAT, 0x3f800000); 1291 ACCW(NV10_PIPEDAT, 0x3f800000); 1292 ACCW(NV10_PIPEDAT, 0x3f800000); 1293 1294 ACCW(NV10_PIPEADR, 0x00006ac0); 1295 ACCW(NV10_PIPEDAT, 0x00000000); 1296 ACCW(NV10_PIPEDAT, 0x00000000); 1297 ACCW(NV10_PIPEDAT, 0x00000000); 1298 1299 ACCW(NV10_PIPEADR, 0x00006c10); 1300 ACCW(NV10_PIPEDAT, 0xbf800000); 1301 1302 ACCW(NV10_PIPEADR, 0x00007030); 1303 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1304 1305 ACCW(NV10_PIPEADR, 0x00007040); 1306 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1307 1308 ACCW(NV10_PIPEADR, 0x00007050); 1309 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1310 1311 ACCW(NV10_PIPEADR, 0x00007060); 1312 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1313 1314 ACCW(NV10_PIPEADR, 0x00007070); 1315 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1316 1317 ACCW(NV10_PIPEADR, 0x00007080); 1318 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1319 1320 ACCW(NV10_PIPEADR, 0x00007090); 1321 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1322 1323 ACCW(NV10_PIPEADR, 0x000070a0); 1324 ACCW(NV10_PIPEDAT, 0x7149f2ca); 1325 1326 ACCW(NV10_PIPEADR, 0x00006a80); 1327 ACCW(NV10_PIPEDAT, 0x00000000); 1328 ACCW(NV10_PIPEDAT, 0x00000000); 1329 ACCW(NV10_PIPEDAT, 0x3f800000); 1330 1331 ACCW(NV10_PIPEADR, 0x00006aa0); 1332 ACCW(NV10_PIPEDAT, 0x00000000); 1333 ACCW(NV10_PIPEDAT, 0x00000000); 1334 ACCW(NV10_PIPEDAT, 0x00000000); 1335 1336 ACCW(NV10_PIPEADR, 0x00000040); 1337 ACCW(NV10_PIPEDAT, 0x00000005); 1338 1339 ACCW(NV10_PIPEADR, 0x00006400); 1340 ACCW(NV10_PIPEDAT, 0x3f800000); 1341 ACCW(NV10_PIPEDAT, 0x3f800000); 1342 ACCW(NV10_PIPEDAT, 0x4b7fffff); 1343 ACCW(NV10_PIPEDAT, 0x00000000); 1344 1345 ACCW(NV10_PIPEADR, 0x00006410); 1346 ACCW(NV10_PIPEDAT, 0xc5000000); 1347 ACCW(NV10_PIPEDAT, 0xc5000000); 1348 ACCW(NV10_PIPEDAT, 0x00000000); 1349 ACCW(NV10_PIPEDAT, 0x00000000); 1350 1351 ACCW(NV10_PIPEADR, 0x00006420); 1352 ACCW(NV10_PIPEDAT, 0x00000000); 1353 ACCW(NV10_PIPEDAT, 0x00000000); 1354 ACCW(NV10_PIPEDAT, 0x00000000); 1355 ACCW(NV10_PIPEDAT, 0x00000000); 1356 1357 ACCW(NV10_PIPEADR, 0x00006430); 1358 ACCW(NV10_PIPEDAT, 0x00000000); 1359 ACCW(NV10_PIPEDAT, 0x00000000); 1360 ACCW(NV10_PIPEDAT, 0x00000000); 1361 ACCW(NV10_PIPEDAT, 0x00000000); 1362 1363 ACCW(NV10_PIPEADR, 0x000064c0); 1364 ACCW(NV10_PIPEDAT, 0x3f800000); 1365 ACCW(NV10_PIPEDAT, 0x3f800000); 1366 ACCW(NV10_PIPEDAT, 0x477fffff); 1367 ACCW(NV10_PIPEDAT, 0x3f800000); 1368 1369 ACCW(NV10_PIPEADR, 0x000064d0); 1370 ACCW(NV10_PIPEDAT, 0xc5000000); 1371 ACCW(NV10_PIPEDAT, 0xc5000000); 1372 ACCW(NV10_PIPEDAT, 0x00000000); 1373 ACCW(NV10_PIPEDAT, 0x00000000); 1374 1375 ACCW(NV10_PIPEADR, 0x000064e0); 1376 ACCW(NV10_PIPEDAT, 0xc4fff000); 1377 ACCW(NV10_PIPEDAT, 0xc4fff000); 1378 ACCW(NV10_PIPEDAT, 0x00000000); 1379 ACCW(NV10_PIPEDAT, 0x00000000); 1380 1381 ACCW(NV10_PIPEADR, 0x000064f0); 1382 ACCW(NV10_PIPEDAT, 0x00000000); 1383 ACCW(NV10_PIPEDAT, 0x00000000); 1384 ACCW(NV10_PIPEDAT, 0x00000000); 1385 ACCW(NV10_PIPEDAT, 0x00000000); 1386 1387 /* turn lightning on */ 1388 ACCW(NV10_XFMOD0, 0x30000000); 1389 /* set light 1 to infinite type, other lights remain off */ 1390 ACCW(NV10_XFMOD1, 0x00000004); 1391 1392 /* Z-buffer state is: 1393 * initialized, set to: 'fixed point' (integer?); Z-buffer; 16bits depth */ 1394 /* note: 1395 * other options possible are: floating point; 24bits depth; W-buffer */ 1396 ACCW(GLOB_STAT_0, 0x10000000); 1397 /* set DMA instance 2 and 3 to be invalid */ 1398 ACCW(GLOB_STAT_1, 0x00000000); 1399 } 1400 } 1401 1402 static void nv_start_dma(void) 1403 { 1404 uint32 dummy; 1405 1406 if (si->engine.dma.current != si->engine.dma.put) 1407 { 1408 si->engine.dma.put = si->engine.dma.current; 1409 /* flush used caches so we know for sure the DMA cmd buffer received all data. */ 1410 if (si->ps.card_arch < NV40A) 1411 { 1412 /* some CPU's support out-of-order processing (WinChip/Cyrix). Flush them. */ 1413 __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory"); 1414 /* read a non-cached adress to flush the cash */ 1415 dummy = ACCR(STATUS); 1416 } 1417 else 1418 { 1419 /* dummy read the first adress of the framebuffer to flush MTRR-WC buffers */ 1420 dummy = *((volatile uint32 *)(si->framebuffer)); 1421 } 1422 1423 /* actually start DMA to execute all commands now in buffer */ 1424 /* note: 1425 * it doesn't matter which FIFO channel's DMA registers we access, they are in 1426 * fact all the same set. It also doesn't matter if the channel was assigned a 1427 * command or not. */ 1428 /* note also: 1429 * NV_GENERAL_DMAPUT is a write-only register on some cards (confirmed NV11). */ 1430 NV_REG32(NVACC_FIFO + NV_GENERAL_DMAPUT) = (si->engine.dma.put << 2); 1431 } 1432 } 1433 1434 /* this routine does not check the engine's internal hardware FIFO, but the DMA 1435 * command buffer. You can see this as a FIFO as well, that feeds the hardware FIFO. 1436 * The hardware FIFO state is checked by the DMA hardware automatically. */ 1437 static status_t nv_acc_fifofree_dma(uint16 cmd_size) 1438 { 1439 uint32 dmaget; 1440 1441 /* we'd better check for timeouts on the DMA engine as it's theoretically 1442 * breakable by malfunctioning software */ 1443 uint16 cnt = 0; 1444 1445 /* check if the DMA buffer has enough room for the command. 1446 * note: 1447 * engine.dma.free is 'cached' */ 1448 while ((si->engine.dma.free < cmd_size) && (cnt < 10000) && (err < 3)) 1449 { 1450 /* see where the engine is currently fetching from the buffer */ 1451 /* note: 1452 * read this only once in the code as accessing registers is relatively slow */ 1453 /* note also: 1454 * it doesn't matter which FIFO channel's DMA registers we access, they are in 1455 * fact all the same set. It also doesn't matter if the channel was assigned a 1456 * command or not. */ 1457 dmaget = ((NV_REG32(NVACC_FIFO + NV_GENERAL_DMAGET)) >> 2); 1458 1459 /* update timeout counter: on NV11 on a Pentium4 2.8Ghz max reached count 1460 * using BeRoMeter 1.2.6 was about 600; so counting 10000 before generating 1461 * a timeout should definately do it. Snooze()-ing cannot be done without a 1462 * serious speed penalty, even if done for only 1 microSecond. */ 1463 cnt++; 1464 1465 /* where's the engine fetching viewed from us issuing? */ 1466 if (si->engine.dma.put >= dmaget) 1467 { 1468 /* engine is fetching 'behind us', the last piece of the buffer is free */ 1469 1470 /* note the 'updated' free space we have in the DMA buffer */ 1471 si->engine.dma.free = si->engine.dma.max - si->engine.dma.current; 1472 /* if it's enough after all we exit this routine immediately. Else: */ 1473 if (si->engine.dma.free < cmd_size) 1474 { 1475 /* not enough room left, so instruct DMA engine to reset the buffer 1476 * when it's reaching the end of it */ 1477 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x20000000; 1478 /* reset our buffer pointer, so new commands will be placed at the 1479 * beginning of the buffer. */ 1480 si->engine.dma.current = 0; 1481 /* tell the engine to fetch the remaining command(s) in the DMA buffer 1482 * that where not executed before. */ 1483 nv_start_dma(); 1484 1485 /* NOW the engine is fetching 'in front of us', so the first piece 1486 * of the buffer is free */ 1487 1488 /* note the updated current free space we have in the DMA buffer */ 1489 si->engine.dma.free = dmaget - si->engine.dma.current; 1490 /* mind this pittfall: 1491 * Leave some room between where the engine is fetching and where we 1492 * put new commands. Otherwise the engine will crash on heavy loads. 1493 * A crash can be forced best in 640x480x32 mode with BeRoMeter 1.2.6. 1494 * (confirmed on NV11 and NV43 with less than 256 words forced freespace.) 1495 * Note: 1496 * The engine is DMA triggered for fetching chunks every 128 bytes, 1497 * maybe this is the reason for this behaviour. 1498 * Note also: 1499 * it looks like the space that needs to be kept free is coupled 1500 * with the size of the DMA buffer. */ 1501 if (si->engine.dma.free < 256) 1502 si->engine.dma.free = 0; 1503 else 1504 si->engine.dma.free -= 256; 1505 } 1506 } 1507 else 1508 { 1509 /* engine is fetching 'in front of us', so the first piece of the buffer 1510 * is free */ 1511 1512 /* note the updated current free space we have in the DMA buffer */ 1513 si->engine.dma.free = dmaget - si->engine.dma.current; 1514 /* mind this pittfall: 1515 * Leave some room between where the engine is fetching and where we 1516 * put new commands. Otherwise the engine will crash on heavy loads. 1517 * A crash can be forced best in 640x480x32 mode with BeRoMeter 1.2.6. 1518 * (confirmed on NV11 and NV43 with less than 256 words forced freespace.) 1519 * Note: 1520 * The engine is DMA triggered for fetching chunks every 128 bytes, 1521 * maybe this is the reason for this behaviour. 1522 * Note also: 1523 * it looks like the space that needs to be kept free is coupled 1524 * with the size of the DMA buffer. */ 1525 if (si->engine.dma.free < 256) 1526 si->engine.dma.free = 0; 1527 else 1528 si->engine.dma.free -= 256; 1529 } 1530 } 1531 1532 /* log timeout if we had one */ 1533 if (cnt == 10000) 1534 { 1535 if (err < 3) err++; 1536 LOG(4,("ACC_DMA: fifofree; DMA timeout #%d, engine trouble!\n", err)); 1537 } 1538 1539 /* we must make the acceleration routines abort or the driver will hang! */ 1540 if (err >= 3) return B_ERROR; 1541 1542 return B_OK; 1543 } 1544 1545 static void nv_acc_cmd_dma(uint32 cmd, uint16 offset, uint16 size) 1546 { 1547 /* NV_FIFO_DMA_OPCODE: set number of cmd words (b18 - 28); set FIFO offset for 1548 * first cmd word (b2 - 15); set DMA opcode = method (b29 - 31). 1549 * a 'NOP' is the opcode word $00000000. */ 1550 /* note: 1551 * possible DMA opcodes: 1552 * b'000' is 'method' (execute cmd); 1553 * b'001' is 'jump'; 1554 * b'002' is 'noninc method' (execute buffer wrap-around); 1555 * b'003' is 'call': return is executed by opcode word $00020000 (b17 = 1). */ 1556 /* note also: 1557 * this system uses auto-increments for the FIFO offset adresses. Make sure 1558 * to set a new adress if a gap exists between the previous one and the new one. */ 1559 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = ((size << 18) | 1560 ((si->engine.fifo.ch_ptr[cmd] + offset) & 0x0000fffc)); 1561 1562 /* space left after issuing the current command is the cmd AND it's arguments less */ 1563 si->engine.dma.free -= (size + 1); 1564 } 1565 1566 static void nv_acc_set_ch_dma(uint16 ch, uint32 handle) 1567 { 1568 /* issue FIFO channel assign cmd */ 1569 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = ((1 << 18) | ch); 1570 /* set new assignment */ 1571 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = (0x80000000 | handle); 1572 1573 /* space left after issuing the current command is the cmd AND it's arguments less */ 1574 si->engine.dma.free -= 2; 1575 } 1576 1577 /* note: 1578 * switching fifo channel assignments this way has no noticable slowdown: 1579 * measured 0.2% with Quake2. */ 1580 void nv_acc_assert_fifo_dma(void) 1581 { 1582 /* does every engine cmd this accelerant needs have a FIFO channel? */ 1583 //fixme: can probably be optimized for both speed and channel selection... 1584 if (!si->engine.fifo.ch_ptr[NV_ROP5_SOLID] || 1585 !si->engine.fifo.ch_ptr[NV_IMAGE_BLACK_RECTANGLE] || 1586 !si->engine.fifo.ch_ptr[NV_IMAGE_PATTERN] || 1587 !si->engine.fifo.ch_ptr[NV4_SURFACE] || 1588 !si->engine.fifo.ch_ptr[NV_IMAGE_BLIT] || 1589 !si->engine.fifo.ch_ptr[NV4_GDI_RECTANGLE_TEXT] || 1590 !si->engine.fifo.ch_ptr[NV_SCALED_IMAGE_FROM_MEMORY]) 1591 { 1592 uint16 cnt; 1593 1594 /* free the FIFO channels we want from the currently assigned cmd's */ 1595 si->engine.fifo.ch_ptr[si->engine.fifo.handle[0]] = 0; 1596 si->engine.fifo.ch_ptr[si->engine.fifo.handle[1]] = 0; 1597 si->engine.fifo.ch_ptr[si->engine.fifo.handle[2]] = 0; 1598 si->engine.fifo.ch_ptr[si->engine.fifo.handle[3]] = 0; 1599 si->engine.fifo.ch_ptr[si->engine.fifo.handle[4]] = 0; 1600 si->engine.fifo.ch_ptr[si->engine.fifo.handle[5]] = 0; 1601 si->engine.fifo.ch_ptr[si->engine.fifo.handle[6]] = 0; 1602 1603 /* set new object handles */ 1604 si->engine.fifo.handle[0] = NV_ROP5_SOLID; 1605 si->engine.fifo.handle[1] = NV_IMAGE_BLACK_RECTANGLE; 1606 si->engine.fifo.handle[2] = NV_IMAGE_PATTERN; 1607 si->engine.fifo.handle[3] = NV4_SURFACE; 1608 si->engine.fifo.handle[4] = NV_IMAGE_BLIT; 1609 si->engine.fifo.handle[5] = NV4_GDI_RECTANGLE_TEXT; 1610 si->engine.fifo.handle[6] = NV_SCALED_IMAGE_FROM_MEMORY; 1611 1612 /* set handle's pointers to their assigned FIFO channels */ 1613 /* note: 1614 * b0-1 aren't used as adressbits. Using b0 to indicate a valid pointer. */ 1615 for (cnt = 0; cnt < 0x08; cnt++) 1616 { 1617 si->engine.fifo.ch_ptr[(si->engine.fifo.handle[cnt])] = 1618 (0x00000001 + (cnt * 0x00002000)); 1619 } 1620 1621 /* wait for room in fifo for new FIFO assigment cmds if needed. */ 1622 if (nv_acc_fifofree_dma(14) != B_OK) return; 1623 1624 /* program new FIFO assignments */ 1625 /* Raster OPeration: */ 1626 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH0, si->engine.fifo.handle[0]); 1627 /* Clip: */ 1628 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH1, si->engine.fifo.handle[1]); 1629 /* Pattern: */ 1630 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH2, si->engine.fifo.handle[2]); 1631 /* 2D Surface: */ 1632 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH3, si->engine.fifo.handle[3]); 1633 /* Blit: */ 1634 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH4, si->engine.fifo.handle[4]); 1635 /* Bitmap: */ 1636 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH5, si->engine.fifo.handle[5]); 1637 /* Scaled and fitered Blit: */ 1638 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH6, si->engine.fifo.handle[6]); 1639 1640 /* tell the engine to fetch and execute all (new) commands in the DMA buffer */ 1641 nv_start_dma(); 1642 } 1643 } 1644 1645 /* 1646 note: 1647 moved acceleration 'top-level' routines to be integrated in the engine: 1648 it is costly to call the engine for every single function within a loop! 1649 (measured with BeRoMeter 1.2.6: upto 15% speed increase on all CPU's.) 1650 1651 note also: 1652 splitting up each command list into sublists (see routines below) prevents 1653 a lot more nested calls, further increasing the speed with upto 70%. 1654 1655 finally: 1656 sending the sublist to just one single engine command even further increases 1657 speed with upto another 10%. This can't be done for blits though, as this engine- 1658 command's hardware does not support multiple objects. 1659 */ 1660 1661 /* screen to screen blit - i.e. move windows around and scroll within them. */ 1662 void SCREEN_TO_SCREEN_BLIT_DMA(engine_token *et, blit_params *list, uint32 count) 1663 { 1664 uint32 i = 0; 1665 uint16 subcnt; 1666 1667 /*** init acc engine for blit function ***/ 1668 /* ROP registers (Raster OPeration): 1669 * wait for room in fifo for ROP cmd if needed. */ 1670 if (nv_acc_fifofree_dma(2) != B_OK) return; 1671 /* now setup ROP (writing 2 32bit words) for GXcopy */ 1672 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); 1673 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xcc; /* SetRop5 */ 1674 1675 /*** do each blit ***/ 1676 /* Note: 1677 * blit-copy direction is determined inside nvidia hardware: no setup needed */ 1678 while (count) 1679 { 1680 /* break up the list in sublists to minimize calls, while making sure long 1681 * lists still get executed without trouble */ 1682 subcnt = 32; 1683 if (count < 32) subcnt = count; 1684 count -= subcnt; 1685 1686 /* wait for room in fifo for blit cmd if needed. */ 1687 if (nv_acc_fifofree_dma(4 * subcnt) != B_OK) return; 1688 1689 while (subcnt--) 1690 { 1691 /* now setup blit (writing 4 32bit words) */ 1692 nv_acc_cmd_dma(NV_IMAGE_BLIT, NV_IMAGE_BLIT_SOURCEORG, 3); 1693 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1694 (((list[i].src_top) << 16) | (list[i].src_left)); /* SourceOrg */ 1695 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1696 (((list[i].dest_top) << 16) | (list[i].dest_left)); /* DestOrg */ 1697 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1698 ((((list[i].height) + 1) << 16) | ((list[i].width) + 1)); /* HeightWidth */ 1699 1700 i++; 1701 } 1702 1703 /* tell the engine to fetch the commands in the DMA buffer that where not 1704 * executed before. */ 1705 nv_start_dma(); 1706 } 1707 1708 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 1709 si->engine.threeD.reload = 0xffffffff; 1710 } 1711 1712 /* scaled and filtered screen to screen blit - i.e. video playback without overlay */ 1713 /* note: source and destination may not overlap. */ 1714 //fixme? checkout NV5 and NV10 version of cmd: faster?? (or is 0x77 a 'autoselect' version?) 1715 void SCREEN_TO_SCREEN_SCALED_FILTERED_BLIT_DMA(engine_token *et, scaled_blit_params *list, uint32 count) 1716 { 1717 uint32 i = 0; 1718 uint16 subcnt; 1719 uint32 cmd_depth; 1720 uint8 bpp; 1721 1722 /*** init acc engine for scaled filtered blit function ***/ 1723 /* Set pixel width */ 1724 switch(si->dm.space) 1725 { 1726 case B_RGB15_LITTLE: 1727 cmd_depth = 0x00000002; 1728 bpp = 2; 1729 break; 1730 case B_RGB16_LITTLE: 1731 cmd_depth = 0x00000007; 1732 bpp = 2; 1733 break; 1734 case B_RGB32_LITTLE: 1735 case B_RGBA32_LITTLE: 1736 cmd_depth = 0x00000004; 1737 bpp = 4; 1738 break; 1739 /* fixme sometime: 1740 * we could do the spaces below if this function would be modified to be able 1741 * to use a source outside of the desktop, i.e. using offscreen bitmaps... */ 1742 case B_YCbCr422: 1743 cmd_depth = 0x00000005; 1744 bpp = 2; 1745 break; 1746 case B_YUV422: 1747 cmd_depth = 0x00000006; 1748 bpp = 2; 1749 break; 1750 default: 1751 /* note: this function does not support src or dest in the B_CMAP8 space! */ 1752 //fixme: the NV10 version of this cmd supports B_CMAP8 src though... (checkout) 1753 LOG(8,("ACC_DMA: scaled_filtered_blit, invalid bit depth\n")); 1754 return; 1755 } 1756 1757 /* modify surface depth settings for 15-bit colorspace so command works as intended */ 1758 if (si->dm.space == B_RGB15_LITTLE) 1759 { 1760 /* wait for room in fifo for surface setup cmd if needed */ 1761 if (nv_acc_fifofree_dma(2) != B_OK) return; 1762 /* now setup 2D surface (writing 1 32bit word) */ 1763 nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 1); 1764 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000002; /* Format */ 1765 } 1766 1767 /* TNT1 has fixed operation mode 'SRCcopy' while the rest can be programmed: */ 1768 if (si->ps.card_type != NV04) 1769 { 1770 /* wait for room in fifo for cmds if needed. */ 1771 if (nv_acc_fifofree_dma(5) != B_OK) return; 1772 /* now setup source bitmap colorspace */ 1773 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SETCOLORFORMAT, 2); 1774 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ 1775 /* now setup operation mode to SRCcopy */ 1776 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000003; /* SetOperation */ 1777 } 1778 else 1779 { 1780 /* wait for room in fifo for cmd if needed. */ 1781 if (nv_acc_fifofree_dma(4) != B_OK) return; 1782 /* now setup source bitmap colorspace */ 1783 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SETCOLORFORMAT, 1); 1784 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ 1785 /* TNT1 has fixed operation mode SRCcopy */ 1786 } 1787 /* now setup fill color (writing 2 32bit words) */ 1788 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); 1789 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000000; /* Color1A */ 1790 1791 /*** do each blit ***/ 1792 while (count) 1793 { 1794 /* break up the list in sublists to minimize calls, while making sure long 1795 * lists still get executed without trouble */ 1796 subcnt = 16; 1797 if (count < 16) subcnt = count; 1798 count -= subcnt; 1799 1800 /* wait for room in fifo for blit cmd if needed. */ 1801 if (nv_acc_fifofree_dma(12 * subcnt) != B_OK) return; 1802 1803 while (subcnt--) 1804 { 1805 /* now setup blit (writing 12 32bit words) */ 1806 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SOURCEORG, 6); 1807 /* setup dest clipping ref for blit (not used) (b0-15 = left, b16-31 = top) */ 1808 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0; /* SourceOrg */ 1809 /* setup dest clipping size for blit */ 1810 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1811 (((list[i].dest_height + 1) << 16) | (list[i].dest_width + 1)); /* SourceHeightWidth */ 1812 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1813 /* setup destination location and size for blit */ 1814 (((list[i].dest_top) << 16) | (list[i].dest_left)); /* DestOrg */ 1815 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1816 (((list[i].dest_height + 1) << 16) | (list[i].dest_width + 1)); /* DestHeightWidth */ 1817 //fixme: findout scaling limits... (although the current cmd interface doesn't support them.) 1818 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1819 (((list[i].src_width + 1) << 20) / (list[i].dest_width + 1)); /* HorInvScale (in 12.20 format) */ 1820 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1821 (((list[i].src_height + 1) << 20) / (list[i].dest_height + 1)); /* VerInvScale (in 12.20 format) */ 1822 1823 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SOURCESIZE, 4); 1824 /* setup horizontal and vertical source (fetching) ends. 1825 * note: 1826 * horizontal granularity is 2 pixels, vertical granularity is 1 pixel. 1827 * look at Matrox or Neomagic bes engines code for usage example. */ 1828 //fixme: tested 15, 16 and 32-bit RGB depth, verify other depths... 1829 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1830 (((list[i].src_height + 1) << 16) | 1831 (((list[i].src_width + 1) + 0x0001) & ~0x0001)); /* SourceHeightWidth */ 1832 /* setup source pitch (b0-15). Set 'format origin center' (b16-17) and 1833 * select 'format interpolator foh (bilinear filtering)' (b24). */ 1834 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1835 (si->fbc.bytes_per_row | (1 << 16) | (1 << 24)); /* SourcePitch */ 1836 /* setup source surface location */ 1837 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1838 ((uint32)((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)) + 1839 (list[i].src_top * si->fbc.bytes_per_row) + (list[i].src_left * bpp); /* Offset */ 1840 /* setup source start: first (sub)pixel contributing to output picture */ 1841 /* note: 1842 * clipping is not asked for. 1843 * look at nVidia NV10+ bes engine code for useage example. */ 1844 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1845 0; /* SourceRef (b0-15 = hor, b16-31 = ver: both in 12.4 format) */ 1846 1847 i++; 1848 } 1849 1850 /* tell the engine to fetch the commands in the DMA buffer that where not 1851 * executed before. */ 1852 nv_start_dma(); 1853 } 1854 1855 /* reset surface depth settings so the other engine commands works as intended */ 1856 if (si->dm.space == B_RGB15_LITTLE) 1857 { 1858 /* wait for room in fifo for surface setup cmd if needed */ 1859 if (nv_acc_fifofree_dma(2) != B_OK) return; 1860 /* now setup 2D surface (writing 1 32bit word) */ 1861 nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 1); 1862 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000004; /* Format */ 1863 1864 /* tell the engine to fetch the commands in the DMA buffer that where not 1865 * executed before. */ 1866 nv_start_dma(); 1867 } 1868 1869 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 1870 si->engine.threeD.reload = 0xffffffff; 1871 } 1872 1873 /* rectangle fill - i.e. workspace and window background color */ 1874 void FILL_RECTANGLE_DMA(engine_token *et, uint32 colorIndex, fill_rect_params *list, uint32 count) 1875 { 1876 uint32 i = 0; 1877 uint16 subcnt; 1878 1879 /*** init acc engine for fill function ***/ 1880 /* ROP registers (Raster OPeration): 1881 * wait for room in fifo for ROP and bitmap cmd if needed. */ 1882 if (nv_acc_fifofree_dma(4) != B_OK) return; 1883 /* now setup ROP (writing 2 32bit words) for GXcopy */ 1884 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); 1885 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xcc; /* SetRop5 */ 1886 /* now setup fill color (writing 2 32bit words) */ 1887 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); 1888 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = colorIndex; /* Color1A */ 1889 1890 /*** draw each rectangle ***/ 1891 while (count) 1892 { 1893 /* break up the list in sublists to minimize calls, while making sure long 1894 * lists still get executed without trouble */ 1895 subcnt = 32; 1896 if (count < 32) subcnt = count; 1897 count -= subcnt; 1898 1899 /* wait for room in fifo for bitmap cmd if needed. */ 1900 if (nv_acc_fifofree_dma(1 + (2 * subcnt)) != B_OK) return; 1901 1902 /* issue fill command once... */ 1903 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, (2 * subcnt)); 1904 /* ... and send multiple rects (engine cmd supports 32 max) */ 1905 while (subcnt--) 1906 { 1907 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1908 (((list[i].left) << 16) | ((list[i].top) & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */ 1909 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1910 (((((list[i].right)+1) - (list[i].left)) << 16) | 1911 (((list[i].bottom-list[i].top)+1) & 0x0000ffff)); /* Unclipped Rect 0 WidthHeight */ 1912 1913 i++; 1914 } 1915 1916 /* tell the engine to fetch the commands in the DMA buffer that where not 1917 * executed before. */ 1918 nv_start_dma(); 1919 } 1920 1921 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 1922 si->engine.threeD.reload = 0xffffffff; 1923 } 1924 1925 /* span fill - i.e. (selected) menuitem background color (Dano) */ 1926 void FILL_SPAN_DMA(engine_token *et, uint32 colorIndex, uint16 *list, uint32 count) 1927 { 1928 uint32 i = 0; 1929 uint16 subcnt; 1930 1931 /*** init acc engine for fill function ***/ 1932 /* ROP registers (Raster OPeration): 1933 * wait for room in fifo for ROP and bitmap cmd if needed. */ 1934 if (nv_acc_fifofree_dma(4) != B_OK) return; 1935 /* now setup ROP (writing 2 32bit words) for GXcopy */ 1936 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); 1937 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xcc; /* SetRop5 */ 1938 /* now setup fill color (writing 2 32bit words) */ 1939 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); 1940 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = colorIndex; /* Color1A */ 1941 1942 /*** draw each span ***/ 1943 while (count) 1944 { 1945 /* break up the list in sublists to minimize calls, while making sure long 1946 * lists still get executed without trouble */ 1947 subcnt = 32; 1948 if (count < 32) subcnt = count; 1949 count -= subcnt; 1950 1951 /* wait for room in fifo for bitmap cmd if needed. */ 1952 if (nv_acc_fifofree_dma(1 + (2 * subcnt)) != B_OK) return; 1953 1954 /* issue fill command once... */ 1955 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, (2 * subcnt)); 1956 /* ... and send multiple rects (spans) (engine cmd supports 32 max) */ 1957 while (subcnt--) 1958 { 1959 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1960 (((list[i+1]) << 16) | ((list[i]) & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */ 1961 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 1962 ((((list[i+2]+1) - (list[i+1])) << 16) | 0x00000001); /* Unclipped Rect 0 WidthHeight */ 1963 1964 i+=3; 1965 } 1966 1967 /* tell the engine to fetch the commands in the DMA buffer that where not 1968 * executed before. */ 1969 nv_start_dma(); 1970 } 1971 1972 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 1973 si->engine.threeD.reload = 0xffffffff; 1974 } 1975 1976 /* rectangle invert - i.e. text cursor and text selection */ 1977 void INVERT_RECTANGLE_DMA(engine_token *et, fill_rect_params *list, uint32 count) 1978 { 1979 uint32 i = 0; 1980 uint16 subcnt; 1981 1982 /*** init acc engine for invert function ***/ 1983 /* ROP registers (Raster OPeration): 1984 * wait for room in fifo for ROP and bitmap cmd if needed. */ 1985 if (nv_acc_fifofree_dma(4) != B_OK) return; 1986 /* now setup ROP (writing 2 32bit words) for GXinvert */ 1987 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); 1988 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x55; /* SetRop5 */ 1989 /* now reset fill color (writing 2 32bit words) */ 1990 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); 1991 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000000; /* Color1A */ 1992 1993 /*** invert each rectangle ***/ 1994 while (count) 1995 { 1996 /* break up the list in sublists to minimize calls, while making sure long 1997 * lists still get executed without trouble */ 1998 subcnt = 32; 1999 if (count < 32) subcnt = count; 2000 count -= subcnt; 2001 2002 /* wait for room in fifo for bitmap cmd if needed. */ 2003 if (nv_acc_fifofree_dma(1 + (2 * subcnt)) != B_OK) return; 2004 2005 /* issue fill command once... */ 2006 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, (2 * subcnt)); 2007 /* ... and send multiple rects (engine cmd supports 32 max) */ 2008 while (subcnt--) 2009 { 2010 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 2011 (((list[i].left) << 16) | ((list[i].top) & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */ 2012 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 2013 (((((list[i].right)+1) - (list[i].left)) << 16) | 2014 (((list[i].bottom-list[i].top)+1) & 0x0000ffff)); /* Unclipped Rect 0 WidthHeight */ 2015 2016 i++; 2017 } 2018 2019 /* tell the engine to fetch the commands in the DMA buffer that where not 2020 * executed before. */ 2021 nv_start_dma(); 2022 } 2023 2024 /* tell 3D add-ons that they should reload their rendering states and surfaces */ 2025 si->engine.threeD.reload = 0xffffffff; 2026 } 2027