1 /* NV Acceleration functions */ 2 3 /* Author: 4 Rudolf Cornelissen 8/2003-1/2005. 5 6 This code was possible thanks to: 7 - the Linux XFree86 NV driver, 8 - the Linux UtahGLX 3D driver. 9 */ 10 11 /* 12 note: 13 attempting DMA because without it I can't get NV40 and higher going ATM. 14 Maybe later we can forget about the non-DMA version: that depends on 15 3D acceleration attempts). 16 */ 17 18 #define MODULE_BIT 0x00080000 19 20 #include "nv_std.h" 21 22 /*acceleration notes*/ 23 24 /*functions Be's app_server uses: 25 fill span (horizontal only) 26 fill rectangle (these 2 are very similar) 27 invert rectangle 28 blit 29 */ 30 31 static void nv_start_dma(void); 32 static status_t nv_acc_fifofree_dma(uint16 cmd_size); 33 static void nv_acc_cmd_dma(uint32 cmd, uint16 offset, uint16 size); 34 static void nv_acc_set_ch_dma(uint16 ch, uint32 handle); 35 36 /* used to track engine DMA stalls */ 37 static uint8 err; 38 39 /* wait until engine completely idle */ 40 status_t nv_acc_wait_idle_dma() 41 { 42 /* we'd better check for timeouts on the DMA engine as it's theoretically 43 * breakable by malfunctioning software */ 44 uint16 cnt = 0; 45 46 /* wait until all upcoming commands are in execution at least. Do this until 47 * we hit a timeout; abort if we failed at least three times before: 48 * if DMA stalls, we have to forget about it alltogether at some point, or 49 * the system will almost come to a complete halt.. */ 50 /* note: 51 * it doesn't matter which FIFO channel's DMA registers we access, they are in 52 * fact all the same set. It also doesn't matter if the channel was assigned a 53 * command or not. */ 54 while ((NV_REG32(NVACC_FIFO + NV_GENERAL_DMAGET) != (si->engine.dma.put << 2)) && 55 (cnt < 10000) && (err < 3)) 56 { 57 /* snooze a bit so I do not hammer the bus */ 58 snooze (100); 59 cnt++; 60 } 61 62 /* log timeout if we had one */ 63 if (cnt == 10000) 64 { 65 if (err < 3) err++; 66 LOG(4,("ACC_DMA: wait_idle; DMA timeout #%d, engine trouble!\n", err)); 67 } 68 69 /* wait until execution completed */ 70 while (ACCR(STATUS)) 71 { 72 /* snooze a bit so I do not hammer the bus */ 73 snooze (100); 74 } 75 76 return B_OK; 77 } 78 79 /* AFAIK this must be done for every new screenmode. 80 * Engine required init. */ 81 status_t nv_acc_init_dma() 82 { 83 uint16 cnt; 84 uint32 surf_depth, cmd_depth; 85 /* reset the engine DMA stalls counter */ 86 err = 0; 87 88 /* a hanging engine only recovers from a complete power-down/power-up cycle */ 89 NV_REG32(NV32_PWRUPCTRL) = 0x13110011; 90 snooze(1000); 91 NV_REG32(NV32_PWRUPCTRL) = 0x13111111; 92 93 /* setup PTIMER: */ 94 //fixme? how about NV28 setup as just after coldstarting? (see nv_info.c) 95 /* set timer numerator to 8 (in b0-15) */ 96 ACCW(PT_NUMERATOR, 0x00000008); 97 /* set timer denominator to 3 (in b0-15) */ 98 ACCW(PT_DENOMINATR, 0x00000003); 99 100 /* disable timer-alarm INT requests (b0) */ 101 ACCW(PT_INTEN, 0x00000000); 102 /* reset timer-alarm INT status bit (b0) */ 103 ACCW(PT_INTSTAT, 0xffffffff); 104 105 /* enable PRAMIN write access on pre NV10 before programming it! */ 106 if (si->ps.card_arch == NV04A) 107 { 108 /* set framebuffer config: type = notiling, PRAMIN write access enabled */ 109 NV_REG32(NV32_PFB_CONFIG_0) = 0x00001114; 110 } 111 else 112 { 113 /* setup acc engine 'source' tile adressranges */ 114 ACCW(NV10_FBTIL0AD, 0); 115 ACCW(NV10_FBTIL1AD, 0); 116 ACCW(NV10_FBTIL2AD, 0); 117 ACCW(NV10_FBTIL3AD, 0); 118 ACCW(NV10_FBTIL4AD, 0); 119 ACCW(NV10_FBTIL5AD, 0); 120 ACCW(NV10_FBTIL6AD, 0); 121 ACCW(NV10_FBTIL7AD, 0); 122 ACCW(NV10_FBTIL0ED, (si->ps.memory_size - 1)); 123 ACCW(NV10_FBTIL1ED, (si->ps.memory_size - 1)); 124 ACCW(NV10_FBTIL2ED, (si->ps.memory_size - 1)); 125 ACCW(NV10_FBTIL3ED, (si->ps.memory_size - 1)); 126 ACCW(NV10_FBTIL4ED, (si->ps.memory_size - 1)); 127 ACCW(NV10_FBTIL5ED, (si->ps.memory_size - 1)); 128 ACCW(NV10_FBTIL6ED, (si->ps.memory_size - 1)); 129 ACCW(NV10_FBTIL7ED, (si->ps.memory_size - 1)); 130 } 131 132 /*** PRAMIN ***/ 133 /* first clear the entire RAMHT (hash-table) space to a defined state. It turns 134 * out at least NV11 will keep the previously programmed handles over resets and 135 * power-outages upto about 15 seconds!! Faulty entries might well hang the 136 * engine (confirmed on NV11). 137 * Note: 138 * this behaviour is not very strange: even very old DRAM chips are known to be 139 * able to do this, even though you should refresh them every few milliseconds or 140 * so. (Large memory cell capacitors, though different cells vary a lot in their 141 * capacity.) 142 * Of course data validity is not certain by a long shot over this large 143 * amount of time.. */ 144 for(cnt = 0; cnt < 0x0400; cnt++) 145 NV_REG32(NVACC_HT_HANDL_00 + (cnt << 2)) = 0; 146 /* RAMHT (hash-table) space SETUP FIFO HANDLES */ 147 /* note: 148 * 'instance' tells you where the engine command is stored in 'PR_CTXx_x' sets 149 * below: instance being b4-19 with baseadress NV_PRAMIN_CTX_0 (0x00700000). 150 * That command is linked to the handle noted here. This handle is then used to 151 * tell the FIFO to which engine command it is connected! 152 * (CTX registers are actually a sort of RAM space.) */ 153 if (si->ps.card_arch >= NV40A) 154 { 155 /* (first set) */ 156 ACCW(HT_HANDL_00, (0x80000000 | NV10_CONTEXT_SURFACES_2D)); /* 32bit handle (not used) */ 157 ACCW(HT_VALUE_00, 0x0010114c); /* instance $114c, engine = acc engine, CHID = $00 */ 158 159 ACCW(HT_HANDL_01, (0x80000000 | NV_IMAGE_BLIT)); /* 32bit handle */ 160 ACCW(HT_VALUE_01, 0x00101148); /* instance $1146, engine = acc engine, CHID = $00 */ 161 162 ACCW(HT_HANDL_02, (0x80000000 | NV4_GDI_RECTANGLE_TEXT)); /* 32bit handle */ 163 ACCW(HT_VALUE_02, 0x0010114a); /* instance $1147, engine = acc engine, CHID = $00 */ 164 165 /* (second set) */ 166 ACCW(HT_HANDL_10, (0x80000000 | NV_ROP5_SOLID)); /* 32bit handle */ 167 ACCW(HT_VALUE_10, 0x00101142); /* instance $1142, engine = acc engine, CHID = $00 */ 168 169 ACCW(HT_HANDL_11, (0x80000000 | NV_IMAGE_BLACK_RECTANGLE)); /* 32bit handle */ 170 ACCW(HT_VALUE_11, 0x00101144); /* instance $1143, engine = acc engine, CHID = $00 */ 171 172 ACCW(HT_HANDL_12, (0x80000000 | NV_IMAGE_PATTERN)); /* 32bit handle */ 173 ACCW(HT_VALUE_12, 0x00101146); /* instance $1144, engine = acc engine, CHID = $00 */ 174 } 175 else 176 { 177 /* (first set) */ 178 ACCW(HT_HANDL_00, (0x80000000 | NV4_SURFACE)); /* 32bit handle */ 179 ACCW(HT_VALUE_00, 0x8001114c); /* instance $114c, engine = acc engine, CHID = $00 */ 180 181 ACCW(HT_HANDL_01, (0x80000000 | NV_IMAGE_BLIT)); /* 32bit handle */ 182 ACCW(HT_VALUE_01, 0x80011148); /* instance $1146, engine = acc engine, CHID = $00 */ 183 184 ACCW(HT_HANDL_02, (0x80000000 | NV4_GDI_RECTANGLE_TEXT)); /* 32bit handle */ 185 ACCW(HT_VALUE_02, 0x8001114a); /* instance $1147, engine = acc engine, CHID = $00 */ 186 187 /* (second set) */ 188 ACCW(HT_HANDL_10, (0x80000000 | NV_ROP5_SOLID)); /* 32bit handle */ 189 ACCW(HT_VALUE_10, 0x80011142); /* instance $1142, engine = acc engine, CHID = $00 */ 190 191 ACCW(HT_HANDL_11, (0x80000000 | NV_IMAGE_BLACK_RECTANGLE)); /* 32bit handle */ 192 ACCW(HT_VALUE_11, 0x80011144); /* instance $1143, engine = acc engine, CHID = $00 */ 193 194 ACCW(HT_HANDL_12, (0x80000000 | NV_IMAGE_PATTERN)); /* 32bit handle */ 195 ACCW(HT_VALUE_12, 0x80011146); /* instance $1144, engine = acc engine, CHID = $00 */ 196 } 197 198 /* program CTX registers: CTX1 is mostly done later (colorspace dependant) */ 199 /* note: 200 * CTX determines which HT handles point to what engine commands. */ 201 /* note also: 202 * CTX registers are in fact in the same GPU internal RAM space as the engine's 203 * hashtable. This means that stuff programmed in here also survives resets and 204 * power-outages! (confirmed NV11) */ 205 if (si->ps.card_arch >= NV40A) 206 { 207 /* setup a DMA define for use by command defines below. */ 208 ACCW(PR_CTX0_R, 0x00003000); /* DMA page table present and of linear type; 209 * DMA target node is NVM (non-volatile memory?) 210 * (instead of doing PCI or AGP transfers) */ 211 ACCW(PR_CTX1_R, (si->ps.memory_size - 1)); /* DMA limit: size is all cardRAM */ 212 ACCW(PR_CTX2_R, ((0x00000000 & 0xfffff000) | 0x00000002)); 213 /* DMA access type is READ_AND_WRITE; 214 * memory starts at start of cardRAM (b12-31): 215 * It's adress needs to be at a 4kb boundary! */ 216 ACCW(PR_CTX3_R, 0x00000002); /* unknown (looks like this is rubbish/not needed?) */ 217 /* setup set '0' for cmd NV_ROP5_SOLID */ 218 ACCW(PR_CTX0_0, 0x02080043); /* NVclass $043, patchcfg ROP_AND, nv10+: little endian */ 219 ACCW(PR_CTX1_0, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 220 ACCW(PR_CTX2_0, 0x00000000); /* DMA0 and DMA1 instance invalid */ 221 ACCW(PR_CTX3_0, 0x00000000); /* method traps disabled */ 222 ACCW(PR_CTX0_1, 0x00000000); /* extra */ 223 ACCW(PR_CTX1_1, 0x00000000); /* extra */ 224 /* setup set '1' for cmd NV_IMAGE_BLACK_RECTANGLE */ 225 ACCW(PR_CTX0_2, 0x02080019); /* NVclass $019, patchcfg ROP_AND, nv10+: little endian */ 226 ACCW(PR_CTX1_2, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 227 ACCW(PR_CTX2_2, 0x00000000); /* DMA0 and DMA1 instance invalid */ 228 ACCW(PR_CTX3_2, 0x00000000); /* method traps disabled */ 229 ACCW(PR_CTX0_3, 0x00000000); /* extra */ 230 ACCW(PR_CTX1_3, 0x00000000); /* extra */ 231 /* setup set '2' for cmd NV_IMAGE_PATTERN */ 232 ACCW(PR_CTX0_4, 0x02080018); /* NVclass $018, patchcfg ROP_AND, nv10+: little endian */ 233 ACCW(PR_CTX1_4, 0x02000000); /* colorspace not set, notify instance is $0200 (b16-31) */ 234 ACCW(PR_CTX2_4, 0x00000000); /* DMA0 and DMA1 instance invalid */ 235 ACCW(PR_CTX3_4, 0x00000000); /* method traps disabled */ 236 ACCW(PR_CTX0_5, 0x00000000); /* extra */ 237 ACCW(PR_CTX1_5, 0x00000000); /* extra */ 238 /* setup set '4' for cmd NV_IMAGE_BLIT */ 239 ACCW(PR_CTX0_6, 0x0208005f); /* NVclass $05f, patchcfg ROP_AND, nv10+: little endian */ 240 ACCW(PR_CTX1_6, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 241 ACCW(PR_CTX2_6, 0x00001140); /* DMA0 instance is $1140, DMA1 instance invalid */ 242 ACCW(PR_CTX3_6, 0x00001140); /* method trap 0 is $1140, trap 1 disabled */ 243 ACCW(PR_CTX0_7, 0x00000000); /* extra */ 244 ACCW(PR_CTX1_7, 0x00000000); /* extra */ 245 /* setup set '5' for cmd NV4_GDI_RECTANGLE_TEXT */ 246 ACCW(PR_CTX0_8, 0x0208004a); /* NVclass $04a, patchcfg ROP_AND, nv10+: little endian */ 247 ACCW(PR_CTX1_8, 0x02000000); /* colorspace not set, notify instance is $0200 (b16-31) */ 248 ACCW(PR_CTX2_8, 0x00000000); /* DMA0 and DMA1 instance invalid */ 249 ACCW(PR_CTX3_8, 0x00000000); /* method traps disabled */ 250 ACCW(PR_CTX0_9, 0x00000000); /* extra */ 251 ACCW(PR_CTX1_9, 0x00000000); /* extra */ 252 /* setup set '6' for cmd NV10_CONTEXT_SURFACES_2D */ 253 ACCW(PR_CTX0_A, 0x02080062); /* NVclass $062, nv10+: little endian */ 254 ACCW(PR_CTX1_A, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 255 ACCW(PR_CTX2_A, 0x00001140); /* DMA0 instance is $1140, DMA1 instance invalid */ 256 ACCW(PR_CTX3_A, 0x00001140); /* method trap 0 is $1140, trap 1 disabled */ 257 ACCW(PR_CTX0_B, 0x00000000); /* extra */ 258 ACCW(PR_CTX1_B, 0x00000000); /* extra */ 259 /* setup DMA set pointed at by PF_CACH1_DMAI */ 260 ACCW(PR_CTX0_C, 0x00003002); /* DMA page table present and of linear type; 261 * DMA class is $002 (b0-11); 262 * DMA target node is NVM (non-volatile memory?) 263 * (instead of doing PCI or AGP transfers) */ 264 ACCW(PR_CTX1_C, 0x00007fff); /* DMA limit: tablesize is 32k bytes */ 265 ACCW(PR_CTX2_C, (((si->ps.memory_size - 1) & 0xffff8000) | 0x00000002)); 266 /* DMA access type is READ_AND_WRITE; 267 * table is located at end of cardRAM (b12-31): 268 * It's adress needs to be at a 4kb boundary! */ 269 } 270 else 271 { 272 /* setup a DMA define for use by command defines below. */ 273 ACCW(PR_CTX0_R, 0x00003000); /* DMA page table present and of linear type; 274 * DMA target node is NVM (non-volatile memory?) 275 * (instead of doing PCI or AGP transfers) */ 276 ACCW(PR_CTX1_R, (si->ps.memory_size - 1)); /* DMA limit: size is all cardRAM */ 277 ACCW(PR_CTX2_R, ((0x00000000 & 0xfffff000) | 0x00000002)); 278 /* DMA access type is READ_AND_WRITE; 279 * memory starts at start of cardRAM (b12-31): 280 * It's adress needs to be at a 4kb boundary! */ 281 ACCW(PR_CTX3_R, 0x00000002); /* unknown (looks like this is rubbish/not needed?) */ 282 /* setup set '0' for cmd NV_ROP5_SOLID */ 283 ACCW(PR_CTX0_0, 0x01008043); /* NVclass $043, patchcfg ROP_AND, nv10+: little endian */ 284 ACCW(PR_CTX1_0, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 285 ACCW(PR_CTX2_0, 0x00000000); /* DMA0 and DMA1 instance invalid */ 286 ACCW(PR_CTX3_0, 0x00000000); /* method traps disabled */ 287 /* setup set '1' for cmd NV_IMAGE_BLACK_RECTANGLE */ 288 ACCW(PR_CTX0_2, 0x01008019); /* NVclass $019, patchcfg ROP_AND, nv10+: little endian */ 289 ACCW(PR_CTX1_2, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 290 ACCW(PR_CTX2_2, 0x00000000); /* DMA0 and DMA1 instance invalid */ 291 ACCW(PR_CTX3_2, 0x00000000); /* method traps disabled */ 292 /* setup set '2' for cmd NV_IMAGE_PATTERN */ 293 ACCW(PR_CTX0_4, 0x01008018); /* NVclass $018, patchcfg ROP_AND, nv10+: little endian */ 294 ACCW(PR_CTX1_4, 0x00000002); /* colorspace not set, notify instance is $0200 (b16-31) */ 295 ACCW(PR_CTX2_4, 0x00000000); /* DMA0 and DMA1 instance invalid */ 296 ACCW(PR_CTX3_4, 0x00000000); /* method traps disabled */ 297 /* setup set '4' for cmd NV_IMAGE_BLIT */ 298 ACCW(PR_CTX0_6, 0x0100805f); /* NVclass $05f, patchcfg ROP_AND, nv10+: little endian */ 299 ACCW(PR_CTX1_6, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 300 ACCW(PR_CTX2_6, 0x11401140); /* DMA0 instance is $1140, DMA1 instance invalid */ 301 ACCW(PR_CTX3_6, 0x00000000); /* method trap 0 is $1140, trap 1 disabled */ 302 /* setup set '5' for cmd NV4_GDI_RECTANGLE_TEXT */ 303 ACCW(PR_CTX0_8, 0x0100804a); /* NVclass $04a, patchcfg ROP_AND, nv10+: little endian */ 304 ACCW(PR_CTX1_8, 0x00000002); /* colorspace not set, notify instance is $0200 (b16-31) */ 305 ACCW(PR_CTX2_8, 0x00000000); /* DMA0 and DMA1 instance invalid */ 306 ACCW(PR_CTX3_8, 0x00000000); /* method traps disabled */ 307 /* setup set '6' for ... */ 308 if(si->ps.card_arch >= NV10A) 309 { 310 /* ... cmd NV10_CONTEXT_SURFACES_2D */ 311 ACCW(PR_CTX0_A, 0x01008062); /* NVclass $062, nv10+: little endian */ 312 } 313 else 314 { 315 /* ... cmd NV4_SURFACE */ 316 ACCW(PR_CTX0_A, 0x01008042); /* NVclass $042, nv10+: little endian */ 317 } 318 ACCW(PR_CTX1_A, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */ 319 ACCW(PR_CTX2_A, 0x11401140); /* DMA0 instance is $1140, DMA1 instance invalid */ 320 ACCW(PR_CTX3_A, 0x00000000); /* method trap 0 is $1140, trap 1 disabled */ 321 /* setup DMA set pointed at by PF_CACH1_DMAI */ 322 ACCW(PR_CTX0_C, 0x00003002); /* DMA page table present and of linear type; 323 * DMA class is $002 (b0-11); 324 * DMA target node is NVM (non-volatile memory?) 325 * (instead of doing PCI or AGP transfers) */ 326 ACCW(PR_CTX1_C, 0x00007fff); /* DMA limit: tablesize is 32k bytes */ 327 ACCW(PR_CTX2_C, (((si->ps.memory_size - 1) & 0xffff8000) | 0x00000002)); 328 /* DMA access type is READ_AND_WRITE; 329 * table is located at end of cardRAM (b12-31): 330 * It's adress needs to be at a 4kb boundary! */ 331 } 332 333 if (si->ps.card_arch == NV04A) 334 { 335 /* 336 if((pNv->Chipset & 0x0fff) == 0x0020) 337 { 338 pNv->PRAMIN[0x0824] |= 0x00020000; 339 pNv->PRAMIN[0x0826] += pNv->FbAddress; 340 } 341 pNv->PGRAPH[0x0080/4] = 0x000001FF;//acc DEBUG0 342 pNv->PGRAPH[0x0080/4] = 0x1230C000; 343 pNv->PGRAPH[0x0084/4] = 0x72111101; 344 pNv->PGRAPH[0x0088/4] = 0x11D5F071; 345 pNv->PGRAPH[0x008C/4] = 0x0004FF31; 346 pNv->PGRAPH[0x008C/4] = 0x4004FF31; 347 348 pNv->PGRAPH[0x0140/4] = 0x00000000; 349 pNv->PGRAPH[0x0100/4] = 0xFFFFFFFF; 350 pNv->PGRAPH[0x0170/4] = 0x10010100; 351 pNv->PGRAPH[0x0710/4] = 0xFFFFFFFF; 352 pNv->PGRAPH[0x0720/4] = 0x00000001; 353 354 pNv->PGRAPH[0x0810/4] = 0x00000000; 355 pNv->PGRAPH[0x0608/4] = 0xFFFFFFFF; 356 */ 357 } 358 else 359 { 360 /* do a explicit engine reset */ 361 ACCW(DEBUG0, 0xffffffff); 362 ACCW(DEBUG0, 0x00000000); 363 /* disable all acceleration engine INT reguests */ 364 ACCW(ACC_INTE, 0x00000000); 365 /* reset all acceration engine INT status bits */ 366 ACCW(ACC_INTS, 0xffffffff); 367 /* context control enabled */ 368 ACCW(NV10_CTX_CTRL, 0x10010100); 369 /* all acceleration buffers, pitches and colors are valid */ 370 ACCW(NV10_ACC_STAT, 0xffffffff); 371 /* enable acceleration engine command FIFO */ 372 ACCW(FIFO_EN, 0x00000001); 373 /* setup surface type */ 374 ACCW(NV10_SURF_TYP, ((ACCR(NV10_SURF_TYP)) & 0x0007ff00)); 375 ACCW(NV10_SURF_TYP, ((ACCR(NV10_SURF_TYP)) | 0x00020100)); 376 } 377 378 if (si->ps.card_arch == NV10A) 379 { 380 /* init some function blocks */ 381 ACCW(DEBUG1, 0x00118700); 382 ACCW(DEBUG2, 0x24e00810); 383 ACCW(DEBUG3, 0x55de0030); 384 385 /* copy tile setup stuff from 'source' to acc engine */ 386 for (cnt = 0; cnt < 32; cnt++) 387 { 388 NV_REG32(NVACC_NV10_TIL0AD + (cnt << 2)) = 389 NV_REG32(NVACC_NV10_FBTIL0AD + (cnt << 2)); 390 } 391 392 /* setup location of active screen in framebuffer */ 393 ACCW(OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 394 ACCW(OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 395 /* setup accesible card memory range */ 396 ACCW(BLIMIT0, (si->ps.memory_size - 1)); 397 ACCW(BLIMIT1, (si->ps.memory_size - 1)); 398 399 /* pattern shape value = 8x8, 2 color */ 400 //fixme: setting this here means that we don't need to provide the acc 401 //commands with it. But have other architectures this pre-programmed 402 //explicitly??? I don't think so! 403 ACCW(PAT_SHP, 0x00000000); 404 /* Pgraph Beta AND value (fraction) b23-30 */ 405 ACCW(BETA_AND_VAL, 0xffffffff); 406 } 407 408 if (si->ps.card_arch >= NV20A) 409 { 410 switch (si->ps.card_arch) 411 { 412 case NV40A: 413 /* init some function blocks */ 414 ACCW(DEBUG1, 0x401287c0); 415 ACCW(DEBUG3, 0x60de8051); 416 /* disable specific functions, but enable SETUP_SPARE2 register */ 417 ACCW(NV10_DEBUG4, 0x00008000); 418 /* set limit_viol_pix_adress(?): more likely something unknown.. */ 419 ACCW(NV25_WHAT0, 0x00be3c5f); 420 421 /* unknown.. */ 422 switch (si->ps.card_type) 423 { 424 case NV40: 425 ACCW(NV40_WHAT0, 0x83280fff); 426 ACCW(NV40_WHAT1, 0x000000a0); 427 ACCW(NV40_WHAT2, 0x0078e366); 428 ACCW(NV40_WHAT3, 0x0000014c); 429 // pNv->PFB[0x033C/4] &= 0xffff7fff;//0x00100000 :<<<< NV_PFB_CLOSE_PAGE2, bits unknown 430 break; 431 case NV41: 432 ACCW(NV40P_WHAT0, 0x83280eff); 433 ACCW(NV40P_WHAT1, 0x000000a0); 434 ACCW(NV40P_WHAT2, 0x007596ff); 435 ACCW(NV40P_WHAT3, 0x00000108); 436 break; 437 case NV43: 438 ACCW(NV40P_WHAT0, 0x83280eff); 439 ACCW(NV40P_WHAT1, 0x000000a0); 440 ACCW(NV40P_WHAT2, 0x0072cb77); 441 ACCW(NV40P_WHAT3, 0x00000108); 442 break; 443 case NV45: //fixme, checkout: this is cardID 0x016x at least! 444 ACCW(NV40P_WHAT0, 0x83280eff); 445 ACCW(NV40P_WHAT1, 0x000000a0); 446 447 NV_REG32(NV32_NV45_WHAT10) = NV_REG32(NV32_NV10STRAPINFO); 448 NV_REG32(NV32_NV45_WHAT11) = 0x00000000; 449 NV_REG32(NV32_NV45_WHAT12) = 0x00000000; 450 NV_REG32(NV32_NV45_WHAT13) = NV_REG32(NV32_NV10STRAPINFO); 451 452 ACCW(NV45_WHAT2, 0x00000000); 453 ACCW(NV45_WHAT3, 0x00000000); 454 //schakelt screrm signaal uit op NV43, maar timing blijft werken<<<<<<<< 455 // pNv->PRAMDAC[0x0608/4] |= 0x00100000;//0x00680608==NVDAC_TSTCTRL haiku 456 //b20=1=DACTM_TEST ON (termination?) 457 //how about: NVDAC2_TSTCTRL???? 458 break; 459 default: 460 ACCW(NV40P_WHAT0, 0x83280eff); 461 ACCW(NV40P_WHAT1, 0x000000a0); 462 break; 463 } 464 465 ACCW(NV10_TIL3PT, 0x2ffff800); 466 ACCW(NV10_TIL3ST, 0x00006000); 467 ACCW(NV4X_WHAT1, 0x01000000); 468 /* engine data source DMA instance = $1140 */ 469 ACCW(NV4X_DMA_SRC, 0x00001140); 470 break; 471 case NV30A: 472 /* init some function blocks, but most is unknown.. */ 473 ACCW(DEBUG1, 0x40108700); 474 ACCW(NV25_WHAT1, 0x00140000); 475 ACCW(DEBUG3, 0xf00e0431); 476 ACCW(NV10_DEBUG4, 0x00008000); 477 ACCW(NV25_WHAT0, 0xf04b1f36); 478 ACCW(NV20_WHAT3, 0x1002d888); 479 ACCW(NV25_WHAT2, 0x62ff007f); 480 break; 481 case NV20A: 482 /* init some function blocks, but most is unknown.. */ 483 ACCW(DEBUG1, 0x00118700); 484 ACCW(DEBUG3, 0xf20e0431); 485 ACCW(NV10_DEBUG4, 0x00000000); 486 ACCW(NV20_WHAT1, 0x00000040); 487 if (si->ps.card_type < NV25) 488 { 489 ACCW(NV20_WHAT2, 0x00080000); 490 ACCW(NV10_DEBUG5, 0x00000005); 491 ACCW(NV20_WHAT3, 0x45caa208); 492 ACCW(NV20_WHAT4, 0x24000000); 493 ACCW(NV20_WHAT5, 0x00000040); 494 495 /* copy some fixed RAM(?) configuration info(?) to some indexed registers: */ 496 /* b16-24 is select; b2-13 is adress in 32-bit words */ 497 ACCW(RDI_INDEX, 0x00e00038); 498 /* data is 32-bit */ 499 ACCW(RDI_DATA, 0x00000030); 500 /* copy some fixed RAM(?) configuration info(?) to some indexed registers: */ 501 /* b16-24 is select; b2-13 is adress in 32-bit words */ 502 ACCW(RDI_INDEX, 0x00e10038); 503 /* data is 32-bit */ 504 ACCW(RDI_DATA, 0x00000030); 505 } 506 else 507 { 508 ACCW(NV25_WHAT1, 0x00080000); 509 ACCW(NV25_WHAT0, 0x304b1fb6); 510 ACCW(NV20_WHAT3, 0x18b82880); 511 ACCW(NV20_WHAT4, 0x44000000); 512 ACCW(NV20_WHAT5, 0x40000080); 513 ACCW(NV25_WHAT2, 0x000000ff); 514 } 515 break; 516 } 517 518 /* NV20A, NV30A and NV40A: */ 519 /* copy tile setup stuff from 'source' to acc engine (pattern colorRAM?) */ 520 for (cnt = 0; cnt < 32; cnt++) 521 { 522 NV_REG32(NVACC_NV20_WHAT0 + (cnt << 2)) = 523 NV_REG32(NVACC_NV10_FBTIL0AD + (cnt << 2)); 524 } 525 526 if (si->ps.card_arch >= NV40A) 527 { 528 if (si->ps.card_type == NV40) 529 { 530 /* copy some RAM configuration info(?) */ 531 ACCW(NV20_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0)); 532 ACCW(NV20_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1)); 533 ACCW(NV40_WHAT_T2, NV_REG32(NV32_PFB_CONFIG_0)); 534 ACCW(NV40_WHAT_T3, NV_REG32(NV32_PFB_CONFIG_1)); 535 536 /* setup location of active screen in framebuffer */ 537 ACCW(NV20_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 538 ACCW(NV20_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 539 /* setup accesible card memory range */ 540 ACCW(NV20_BLIMIT6, (si->ps.memory_size - 1)); 541 ACCW(NV20_BLIMIT7, (si->ps.memory_size - 1)); 542 } 543 else 544 { 545 /* copy some RAM configuration info(?) */ 546 ACCW(NV40P_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0)); 547 ACCW(NV40P_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1)); 548 ACCW(NV40P_WHAT_T2, NV_REG32(NV32_PFB_CONFIG_0)); 549 ACCW(NV40P_WHAT_T3, NV_REG32(NV32_PFB_CONFIG_1)); 550 551 /* setup location of active screen in framebuffer */ 552 ACCW(NV40P_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 553 ACCW(NV40P_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 554 /* setup accesible card memory range */ 555 ACCW(NV40P_BLIMIT6, (si->ps.memory_size - 1)); 556 ACCW(NV40P_BLIMIT7, (si->ps.memory_size - 1)); 557 } 558 } 559 else /* NV20A and NV30A: */ 560 { 561 /* copy some RAM configuration info(?) */ 562 ACCW(NV20_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0)); 563 ACCW(NV20_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1)); 564 /* copy some RAM configuration info(?) to some indexed registers: */ 565 /* b16-24 is select; b2-13 is adress in 32-bit words */ 566 ACCW(RDI_INDEX, 0x00ea0000); 567 /* data is 32-bit */ 568 ACCW(RDI_DATA, NV_REG32(NV32_PFB_CONFIG_0)); 569 /* b16-24 is select; b2-13 is adress in 32-bit words */ 570 ACCW(RDI_INDEX, 0x00ea0004); 571 /* data is 32-bit */ 572 ACCW(RDI_DATA, NV_REG32(NV32_PFB_CONFIG_1)); 573 574 /* setup location of active screen in framebuffer */ 575 ACCW(NV20_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 576 ACCW(NV20_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)); 577 /* setup accesible card memory range */ 578 ACCW(NV20_BLIMIT6, (si->ps.memory_size - 1)); 579 ACCW(NV20_BLIMIT7, (si->ps.memory_size - 1)); 580 } 581 582 /* NV20A, NV30A and NV40A: */ 583 /* setup some acc engine tile stuff */ 584 ACCW(NV10_TIL2AD, 0x00000000); 585 ACCW(NV10_TIL0ED, 0xffffffff); 586 } 587 588 /* all cards: */ 589 /* setup clipping: rect size is 32768 x 32768, probably max. setting */ 590 /* note: 591 * can also be done via the NV_IMAGE_BLACK_RECTANGLE engine command. */ 592 ACCW(ABS_UCLP_XMIN, 0x00000000); 593 ACCW(ABS_UCLP_YMIN, 0x00000000); 594 ACCW(ABS_UCLP_XMAX, 0x00007fff); 595 ACCW(ABS_UCLP_YMAX, 0x00007fff); 596 597 /*** PFIFO ***/ 598 /* (setup caches) */ 599 /* disable caches reassign */ 600 ACCW(PF_CACHES, 0x00000000); 601 /* PFIFO mode: channel 0 is in DMA mode, channels 1 - 32 are in PIO mode */ 602 ACCW(PF_MODE, 0x00000001); 603 /* cache1 push0 access disabled */ 604 ACCW(PF_CACH1_PSH0, 0x00000000); 605 /* cache1 pull0 access disabled */ 606 ACCW(PF_CACH1_PUL0, 0x00000000); 607 /* cache1 push1 mode = DMA */ 608 if (si->ps.card_arch >= NV40A) 609 ACCW(PF_CACH1_PSH1, 0x00010000); 610 else 611 ACCW(PF_CACH1_PSH1, 0x00000100); 612 /* cache1 DMA Put offset = 0 (b2-28) */ 613 ACCW(PF_CACH1_DMAP, 0x00000000); 614 /* cache1 DMA Get offset = 0 (b2-28) */ 615 ACCW(PF_CACH1_DMAG, 0x00000000); 616 /* cache1 DMA instance adress = $114e (b0-15); 617 * instance being b4-19 with baseadress NV_PRAMIN_CTX_0 (0x00700000). */ 618 /* note: 619 * should point to a DMA definition in CTX register space (which is sort of RAM). 620 * This define tells the engine where the DMA cmd buffer is and what it's size is. 621 * Inside that cmd buffer you'll find the actual issued engine commands. */ 622 ACCW(PF_CACH1_DMAI, 0x0000114e); 623 /* cache0 push0 access disabled */ 624 ACCW(PF_CACH0_PSH0, 0x00000000); 625 /* cache0 pull0 access disabled */ 626 ACCW(PF_CACH0_PUL0, 0x00000000); 627 /* RAM HT (hash table) baseadress = $10000 (b4-8), size = 4k, 628 * search = 128 (is byte offset between hash 'sets') */ 629 /* note: 630 * so HT base is $00710000, last is $00710fff. 631 * In this space you define the engine command handles (HT_HANDL_XX), which 632 * in turn points to the defines in CTX register space (which is sort of RAM) */ 633 ACCW(PF_RAMHT, 0x03000100); 634 /* RAM FC baseadress = $11000 (b3-8) (size is fixed to 0.5k(?)) */ 635 /* note: 636 * so FC base is $00711000, last is $007111ff. (not used?) */ 637 ACCW(PF_RAMFC, 0x00000110); 638 /* RAM RO baseadress = $11200 (b1-8), size = 0.5k */ 639 /* note: 640 * so RO base is $00711200, last is $007113ff. (not used?) */ 641 /* note also: 642 * This means(?) the PRAMIN CTX registers are accessible from base $00711400. */ 643 ACCW(PF_RAMRO, 0x00000112); 644 /* PFIFO size: ch0-15 = 512 bytes, ch16-31 = 124 bytes */ 645 ACCW(PF_SIZE, 0x0000ffff); 646 /* cache1 hash instance = $ffff (b0-15) */ 647 ACCW(PF_CACH1_HASH, 0x0000ffff); 648 /* disable all PFIFO INTs */ 649 ACCW(PF_INTEN, 0x00000000); 650 /* reset all PFIFO INT status bits */ 651 ACCW(PF_INTSTAT, 0xffffffff); 652 /* cache0 pull0 engine = acceleration engine (graphics) */ 653 ACCW(PF_CACH0_PUL1, 0x00000001); 654 /* cache1 DMA control: disable some stuff */ 655 ACCW(PF_CACH1_DMAC, 0x00000000); 656 /* cache1 engine 0 upto/including 7 is software (could also be graphics or DVD) */ 657 ACCW(PF_CACH1_ENG, 0x00000000); 658 /* cache1 DMA fetch: trigger at 128 bytes, size is 32 bytes, max requests is 15, 659 * use little endian */ 660 ACCW(PF_CACH1_DMAF, 0x000f0078); 661 /* cache1 DMA push: b0 = 1: access is enabled */ 662 ACCW(PF_CACH1_DMAS, 0x00000001); 663 /* cache1 push0 access enabled */ 664 ACCW(PF_CACH1_PSH0, 0x00000001); 665 /* cache1 pull0 access enabled */ 666 ACCW(PF_CACH1_PUL0, 0x00000001); 667 /* cache1 pull1 engine = acceleration engine (graphics) */ 668 ACCW(PF_CACH1_PUL1, 0x00000001); 669 /* enable PFIFO caches reassign */ 670 ACCW(PF_CACHES, 0x00000001); 671 672 /*** init acceleration engine command info ***/ 673 /* set object handles */ 674 /* note: 675 * probably depending on some other setup, there are 8 or 32 FIFO channels 676 * available. Assuming the current setup only has 8 channels because the 'rest' 677 * isn't setup here... */ 678 si->engine.fifo.handle[0] = NV_ROP5_SOLID; 679 si->engine.fifo.handle[1] = NV_IMAGE_BLACK_RECTANGLE; 680 si->engine.fifo.handle[2] = NV_IMAGE_PATTERN; 681 si->engine.fifo.handle[3] = NV4_SURFACE; /* NV10_CONTEXT_SURFACES_2D is identical */ 682 si->engine.fifo.handle[4] = NV_IMAGE_BLIT; 683 si->engine.fifo.handle[5] = NV4_GDI_RECTANGLE_TEXT; 684 si->engine.fifo.handle[6] = NV1_RENDER_SOLID_LIN; 685 si->engine.fifo.handle[7] = NV4_DX5_TEXTURE_TRIANGLE; 686 /* preset no FIFO channels assigned to cmd's */ 687 for (cnt = 0; cnt < 0x20; cnt++) 688 { 689 si->engine.fifo.ch_ptr[cnt] = 0; 690 } 691 /* set handle's pointers to their assigned FIFO channels */ 692 /* note: 693 * b0-1 aren't used as adressbits. Using b0 to indicate a valid pointer. */ 694 for (cnt = 0; cnt < 0x08; cnt++) 695 { 696 si->engine.fifo.ch_ptr[(si->engine.fifo.handle[cnt])] = 697 (0x00000001 + (cnt * 0x00002000)); 698 } 699 700 /*** init DMA command buffer info ***/ 701 si->engine.dma.cmdbuffer = (uint32 *)((char *)si->framebuffer + 702 ((si->ps.memory_size - 1) & 0xffff8000)); 703 LOG(4,("ACC_DMA: command buffer is at adress $%08x\n", 704 ((uint32)(si->engine.dma.cmdbuffer)))); 705 /* we have issued no DMA cmd's to the engine yet */ 706 si->engine.dma.put = 0; 707 /* the current first free adress in the DMA buffer is at offset 0 */ 708 si->engine.dma.current = 0; 709 /* the DMA buffer can hold 8k 32-bit words (it's 32kb in size) */ 710 /* note: 711 * one word is reserved at the end of the DMA buffer to be able to instruct the 712 * engine to do a buffer wrap-around! 713 * (DMA opcode 'noninc method': issue word $20000000.) */ 714 si->engine.dma.max = 8192 - 1; 715 /* note the current free space we have left in the DMA buffer */ 716 si->engine.dma.free = si->engine.dma.max - si->engine.dma.current; 717 718 /*** init FIFO via DMA command buffer. ***/ 719 /* wait for room in fifo for new FIFO assigment cmds if needed: */ 720 //fixme if CH6 and CH7 are assigned.. 721 // if (nv_acc_fifofree_dma(16) != B_OK) return B_ERROR; 722 if (nv_acc_fifofree_dma(12) != B_OK) return B_ERROR; 723 724 /* program new FIFO assignments */ 725 /* Raster OPeration: */ 726 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH0, si->engine.fifo.handle[0]); 727 /* Clip: */ 728 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH1, si->engine.fifo.handle[1]); 729 /* Pattern: */ 730 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH2, si->engine.fifo.handle[2]); 731 /* 2D Surface: */ 732 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH3, si->engine.fifo.handle[3]); 733 /* Blit: */ 734 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH4, si->engine.fifo.handle[4]); 735 /* Bitmap: */ 736 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH5, si->engine.fifo.handle[5]); 737 /* Line: (not used or 3D only?) */ 738 //fixme.. 739 // nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH6, si->engine.fifo.handle[6]); 740 /* Textured Triangle: (3D only) */ 741 //fixme.. 742 // nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH7, si->engine.fifo.handle[7]); 743 744 /*** Set pixel width ***/ 745 switch(si->dm.space) 746 { 747 case B_CMAP8: 748 surf_depth = 0x00000001; 749 cmd_depth = 0x00000003; 750 break; 751 case B_RGB15_LITTLE: 752 case B_RGB16_LITTLE: 753 surf_depth = 0x00000004; 754 cmd_depth = 0x00000001; 755 break; 756 case B_RGB32_LITTLE: 757 case B_RGBA32_LITTLE: 758 surf_depth = 0x00000006; 759 cmd_depth = 0x00000003; 760 break; 761 default: 762 LOG(8,("ACC_DMA: init, invalid bit depth\n")); 763 return B_ERROR; 764 } 765 766 /* wait for room in fifo for surface setup cmd if needed */ 767 if (nv_acc_fifofree_dma(5) != B_OK) return B_ERROR; 768 /* now setup 2D surface (writing 5 32bit words) */ 769 nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 4); 770 si->engine.dma.cmdbuffer[si->engine.dma.current++] = surf_depth; /* Format */ 771 /* setup screen pitch */ 772 si->engine.dma.cmdbuffer[si->engine.dma.current++] = 773 ((si->fbc.bytes_per_row & 0x0000ffff) | (si->fbc.bytes_per_row << 16)); /* Pitch */ 774 /* setup screen location */ 775 si->engine.dma.cmdbuffer[si->engine.dma.current++] = 776 ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer); /* OffsetSource */ 777 si->engine.dma.cmdbuffer[si->engine.dma.current++] = 778 ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer); /* OffsetDest */ 779 780 /* wait for room in fifo for pattern colordepth setup cmd if needed */ 781 if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR; 782 /* set pattern colordepth (writing 2 32bit words) */ 783 nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETCOLORFORMAT, 1); 784 si->engine.dma.cmdbuffer[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ 785 786 /* wait for room in fifo for bitmap colordepth setup cmd if needed */ 787 if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR; 788 /* set bitmap colordepth (writing 2 32bit words) */ 789 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_SETCOLORFORMAT, 1); 790 si->engine.dma.cmdbuffer[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */ 791 792 /* tell the engine to fetch and execute all (new) commands in the DMA buffer */ 793 nv_start_dma(); 794 795 return B_OK; 796 } 797 798 static void nv_start_dma(void) 799 { 800 uint8 dummy; 801 802 if (si->engine.dma.current != si->engine.dma.put) 803 { 804 si->engine.dma.put = si->engine.dma.current; 805 /* dummy read the first adress of the framebuffer: flushes MTRR-WC buffers so 806 * we know for sure the DMA command buffer received all data. */ 807 dummy = *((char *)(si->framebuffer)); 808 /* actually start DMA to execute all commands now in buffer */ 809 /* note: 810 * it doesn't matter which FIFO channel's DMA registers we access, they are in 811 * fact all the same set. It also doesn't matter if the channel was assigned a 812 * command or not. */ 813 /* note also: 814 * NV_GENERAL_DMAPUT is a write-only register on some cards (confirmed NV11). */ 815 NV_REG32(NVACC_FIFO + NV_GENERAL_DMAPUT) = (si->engine.dma.put << 2); 816 } 817 } 818 819 /* this routine does not check the engine's internal hardware FIFO, but the DMA 820 * command buffer. You can see this as a FIFO as well, that feeds the hardware FIFO. 821 * The hardware FIFO state is checked by the DMA hardware automatically. */ 822 static status_t nv_acc_fifofree_dma(uint16 cmd_size) 823 { 824 uint32 dmaget; 825 826 /* we'd better check for timeouts on the DMA engine as it's theoretically 827 * breakable by malfunctioning software */ 828 uint16 cnt = 0; 829 830 /* check if the DMA buffer has enough room for the command. 831 * note: 832 * engine.dma.free is 'cached' */ 833 while ((si->engine.dma.free < cmd_size) && (cnt < 10000) && (err < 3)) 834 { 835 /* see where the engine is currently fetching from the buffer */ 836 /* note: 837 * read this only once in the code as accessing registers is relatively slow */ 838 /* note also: 839 * it doesn't matter which FIFO channel's DMA registers we access, they are in 840 * fact all the same set. It also doesn't matter if the channel was assigned a 841 * command or not. */ 842 dmaget = ((NV_REG32(NVACC_FIFO + NV_GENERAL_DMAGET)) >> 2); 843 844 /* update timeout counter: on NV11 on a Pentium4 2.8Ghz max reached count 845 * using BeRoMeter 1.2.6 was about 600; so counting 10000 before generating 846 * a timeout should definately do it. Snooze()-ing cannot be done without a 847 * serious speed penalty, even if done for only 1 microSecond. */ 848 cnt++; 849 850 /* where's the engine fetching viewed from us issuing? */ 851 if (si->engine.dma.put >= dmaget) 852 { 853 /* engine is fetching 'behind us', the last piece of the buffer is free */ 854 855 /* note the 'updated' free space we have in the DMA buffer */ 856 si->engine.dma.free = si->engine.dma.max - si->engine.dma.current; 857 /* if it's enough after all we exit this routine immediately. Else: */ 858 if (si->engine.dma.free < cmd_size) 859 { 860 /* not enough room left, so instruct DMA engine to reset the buffer 861 * when it's reaching the end of it */ 862 si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0x20000000; 863 /* reset our buffer pointer, so new commands will be placed at the 864 * beginning of the buffer. */ 865 si->engine.dma.current = 0; 866 /* tell the engine to fetch the remaining command(s) in the DMA buffer 867 * that where not executed before. */ 868 nv_start_dma(); 869 870 /* NOW the engine is fetching 'in front of us', so the first piece 871 * of the buffer is free */ 872 873 /* note the updated current free space we have in the DMA buffer */ 874 si->engine.dma.free = dmaget - si->engine.dma.current; 875 /* mind this pittfall: 876 * Leave some room between where the engine is fetching and where we 877 * put new commands. Otherwise the engine will crash on heavy loads. 878 * A crash can be forced best in 640x480x32 mode with BeRoMeter 1.2.6. 879 * (confirmed on NV11 and NV43 with less than 256 words forced freespace.) 880 * Note: 881 * The engine is DMA triggered for fetching chunks every 128 bytes, 882 * maybe this is the reason for this behaviour. 883 * Note also: 884 * it looks like the space that needs to be kept free is coupled 885 * with the size of the DMA buffer. */ 886 if (si->engine.dma.free < 256) 887 si->engine.dma.free = 0; 888 else 889 si->engine.dma.free -= 256; 890 } 891 } 892 else 893 { 894 /* engine is fetching 'in front of us', so the first piece of the buffer 895 * is free */ 896 897 /* note the updated current free space we have in the DMA buffer */ 898 si->engine.dma.free = dmaget - si->engine.dma.current; 899 /* mind this pittfall: 900 * Leave some room between where the engine is fetching and where we 901 * put new commands. Otherwise the engine will crash on heavy loads. 902 * A crash can be forced best in 640x480x32 mode with BeRoMeter 1.2.6. 903 * (confirmed on NV11 and NV43 with less than 256 words forced freespace.) 904 * Note: 905 * The engine is DMA triggered for fetching chunks every 128 bytes, 906 * maybe this is the reason for this behaviour. 907 * Note also: 908 * it looks like the space that needs to be kept free is coupled 909 * with the size of the DMA buffer. */ 910 if (si->engine.dma.free < 256) 911 si->engine.dma.free = 0; 912 else 913 si->engine.dma.free -= 256; 914 } 915 } 916 917 /* log timeout if we had one */ 918 if (cnt == 10000) 919 { 920 if (err < 3) err++; 921 LOG(4,("ACC_DMA: fifofree; DMA timeout #%d, engine trouble!\n", err)); 922 } 923 924 /* we must make the acceleration routines abort or the driver will hang! */ 925 if (err >= 3) return B_ERROR; 926 927 return B_OK; 928 } 929 930 static void nv_acc_cmd_dma(uint32 cmd, uint16 offset, uint16 size) 931 { 932 /* NV_FIFO_DMA_OPCODE: set number of cmd words (b18 - 28); set FIFO offset for 933 * first cmd word (b2 - 15); set DMA opcode = method (b29 - 31). 934 * a 'NOP' is the opcode word $00000000. */ 935 /* note: 936 * possible DMA opcodes: 937 * b'000' is 'method' (execute cmd); 938 * b'001' is 'jump'; 939 * b'002' is 'noninc method' (execute buffer wrap-around); 940 * b'003' is 'call': return is executed by opcode word $00020000 (b17 = 1). */ 941 /* note also: 942 * this system uses auto-increments for the FIFO offset adresses. Make sure 943 * to set a new adress if a gap exists between the previous one and the new one. */ 944 si->engine.dma.cmdbuffer[si->engine.dma.current++] = ((size << 18) | 945 ((si->engine.fifo.ch_ptr[cmd] + offset) & 0x0000fffc)); 946 947 /* space left after issuing the current command is the cmd AND it's arguments less */ 948 si->engine.dma.free -= (size + 1); 949 } 950 951 static void nv_acc_set_ch_dma(uint16 ch, uint32 handle) 952 { 953 /* issue FIFO channel assign cmd */ 954 si->engine.dma.cmdbuffer[si->engine.dma.current++] = ((1 << 18) | ch); 955 /* set new assignment */ 956 si->engine.dma.cmdbuffer[si->engine.dma.current++] = (0x80000000 | handle); 957 958 /* space left after issuing the current command is the cmd AND it's arguments less */ 959 si->engine.dma.free -= 2; 960 } 961 962 /* fixme? (check this out..) 963 * Looks like this stuff can be very much simplified and speed-up, as it seems it's not 964 * nessesary to wait for the engine to become idle before re-assigning channels. 965 * Because the cmd handles are actually programmed _inside_ the fifo channels, it might 966 * well be that the assignment is buffered along with the commands that still have to 967 * be executed! 968 * (sounds very plausible to me :) */ 969 void nv_acc_assert_fifo_dma(void) 970 { 971 /* does every engine cmd this accelerant needs have a FIFO channel? */ 972 //fixme: can probably be optimized for both speed and channel selection... 973 if (!si->engine.fifo.ch_ptr[NV_ROP5_SOLID] || 974 !si->engine.fifo.ch_ptr[NV_IMAGE_BLACK_RECTANGLE] || 975 !si->engine.fifo.ch_ptr[NV_IMAGE_PATTERN] || 976 !si->engine.fifo.ch_ptr[NV4_SURFACE] || 977 !si->engine.fifo.ch_ptr[NV_IMAGE_BLIT] || 978 !si->engine.fifo.ch_ptr[NV4_GDI_RECTANGLE_TEXT]) 979 { 980 uint16 cnt; 981 982 /* no, wait until the engine is idle before re-assigning the FIFO */ 983 nv_acc_wait_idle_dma(); 984 985 /* free the FIFO channels we want from the currently assigned cmd's */ 986 si->engine.fifo.ch_ptr[si->engine.fifo.handle[0]] = 0; 987 si->engine.fifo.ch_ptr[si->engine.fifo.handle[1]] = 0; 988 si->engine.fifo.ch_ptr[si->engine.fifo.handle[2]] = 0; 989 si->engine.fifo.ch_ptr[si->engine.fifo.handle[3]] = 0; 990 si->engine.fifo.ch_ptr[si->engine.fifo.handle[4]] = 0; 991 si->engine.fifo.ch_ptr[si->engine.fifo.handle[5]] = 0; 992 993 /* set new object handles */ 994 si->engine.fifo.handle[0] = NV_ROP5_SOLID; 995 si->engine.fifo.handle[1] = NV_IMAGE_BLACK_RECTANGLE; 996 si->engine.fifo.handle[2] = NV_IMAGE_PATTERN; 997 si->engine.fifo.handle[3] = NV4_SURFACE; 998 si->engine.fifo.handle[4] = NV_IMAGE_BLIT; 999 si->engine.fifo.handle[5] = NV4_GDI_RECTANGLE_TEXT; 1000 1001 /* set handle's pointers to their assigned FIFO channels */ 1002 /* note: 1003 * b0-1 aren't used as adressbits. Using b0 to indicate a valid pointer. */ 1004 for (cnt = 0; cnt < 0x08; cnt++) 1005 { 1006 si->engine.fifo.ch_ptr[(si->engine.fifo.handle[cnt])] = 1007 (0x00000001 + (cnt * 0x00002000)); 1008 } 1009 1010 /* wait for room in fifo for new FIFO assigment cmds if needed. */ 1011 if (nv_acc_fifofree_dma(12) != B_OK) return; 1012 1013 /* program new FIFO assignments */ 1014 /* Raster OPeration: */ 1015 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH0, si->engine.fifo.handle[0]); 1016 /* Clip: */ 1017 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH1, si->engine.fifo.handle[1]); 1018 /* Pattern: */ 1019 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH2, si->engine.fifo.handle[2]); 1020 /* 2D Surface: */ 1021 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH3, si->engine.fifo.handle[3]); 1022 /* Blit: */ 1023 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH4, si->engine.fifo.handle[4]); 1024 /* Bitmap: */ 1025 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH5, si->engine.fifo.handle[5]); 1026 1027 /* tell the engine to fetch and execute all (new) commands in the DMA buffer */ 1028 nv_start_dma(); 1029 } 1030 } 1031 1032 /* screen to screen blit - i.e. move windows around and scroll within them. */ 1033 status_t nv_acc_setup_blit_dma() 1034 { 1035 /* setup solid pattern: 1036 * wait for room in fifo for pattern cmd if needed. */ 1037 if (nv_acc_fifofree_dma(7) != B_OK) return B_ERROR; 1038 /* now setup pattern (writing 7 32bit words) */ 1039 nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETSHAPE, 1); 1040 si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0x00000000; /* SetShape: 0 = 8x8, 1 = 64x1, 2 = 1x64 */ 1041 nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETCOLOR0, 4); 1042 si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xffffffff; /* SetColor0 */ 1043 si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xffffffff; /* SetColor1 */ 1044 si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xffffffff; /* SetPattern[0] */ 1045 si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xffffffff; /* SetPattern[1] */ 1046 /* ROP registers (Raster OPeration): 1047 * wait for room in fifo for ROP cmd if needed. */ 1048 if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR; 1049 1050 /* now setup ROP (writing 2 32bit words) for GXcopy */ 1051 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); 1052 si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xcc; /* SetRop5 */ 1053 1054 return B_OK; 1055 } 1056 1057 status_t nv_acc_blit_dma(uint16 xs,uint16 ys,uint16 xd,uint16 yd,uint16 w,uint16 h) 1058 { 1059 /* Note: blit-copy direction is determined inside riva hardware: no setup needed */ 1060 1061 /* instruct engine what to blit: 1062 * wait for room in fifo for blit cmd if needed. */ 1063 if (nv_acc_fifofree_dma(4) != B_OK) return B_ERROR; 1064 /* now setup blit (writing 4 32bit words) */ 1065 nv_acc_cmd_dma(NV_IMAGE_BLIT, NV_IMAGE_BLIT_SOURCEORG, 3); 1066 si->engine.dma.cmdbuffer[si->engine.dma.current++] = ((ys << 16) | xs); /* SourceOrg */ 1067 si->engine.dma.cmdbuffer[si->engine.dma.current++] = ((yd << 16) | xd); /* DestOrg */ 1068 si->engine.dma.cmdbuffer[si->engine.dma.current++] = (((h + 1) << 16) | (w + 1)); /* HeightWidth */ 1069 1070 /* tell the engine to fetch the commands in the DMA buffer that where not 1071 * executed before. At this time the setup done by nv_acc_setup_blit_dma() is 1072 * also executed on the first call of nv_acc_blit_dma(). */ 1073 nv_start_dma(); 1074 1075 return B_OK; 1076 } 1077 1078 /* rectangle fill - i.e. workspace and window background color */ 1079 /* span fill - i.e. (selected) menuitem background color (Dano) */ 1080 status_t nv_acc_setup_rectangle_dma(uint32 color) 1081 { 1082 /* setup solid pattern: 1083 * wait for room in fifo for pattern cmd if needed. */ 1084 if (nv_acc_fifofree_dma(7) != B_OK) return B_ERROR; 1085 /* now setup pattern (writing 7 32bit words) */ 1086 nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETSHAPE, 1); 1087 si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0x00000000; /* SetShape: 0 = 8x8, 1 = 64x1, 2 = 1x64 */ 1088 nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETCOLOR0, 4); 1089 si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xffffffff; /* SetColor0 */ 1090 si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xffffffff; /* SetColor1 */ 1091 si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xffffffff; /* SetPattern[0] */ 1092 si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xffffffff; /* SetPattern[1] */ 1093 1094 /* ROP registers (Raster OPeration): 1095 * wait for room in fifo for ROP cmd if needed. */ 1096 if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR; 1097 /* now setup ROP (writing 2 32bit words) for GXcopy */ 1098 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); 1099 si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xcc; /* SetRop5 */ 1100 1101 /* setup fill color: 1102 * wait for room in fifo for bitmap cmd if needed. */ 1103 if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR; 1104 /* now setup color (writing 2 32bit words) */ 1105 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); 1106 si->engine.dma.cmdbuffer[si->engine.dma.current++] = color; /* Color1A */ 1107 1108 return B_OK; 1109 } 1110 1111 status_t nv_acc_rectangle_dma(uint32 xs,uint32 xe,uint32 ys,uint32 yl) 1112 { 1113 /* instruct engine what to fill: 1114 * wait for room in fifo for bitmap cmd if needed. */ 1115 if (nv_acc_fifofree_dma(3) != B_OK) return B_ERROR; 1116 /* now setup fill (writing 3 32bit words) */ 1117 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, 2); 1118 si->engine.dma.cmdbuffer[si->engine.dma.current++] = 1119 ((xs << 16) | (ys & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */ 1120 si->engine.dma.cmdbuffer[si->engine.dma.current++] = 1121 (((xe - xs) << 16) | (yl & 0x0000ffff)); /* Unclipped Rect 0 WidthHeight */ 1122 1123 /* tell the engine to fetch the commands in the DMA buffer that where not 1124 * executed before. At this time the setup done by nv_acc_setup_rectangle_dma() is 1125 * also executed on the first call of nv_acc_rectangle_dma(). */ 1126 nv_start_dma(); 1127 1128 return B_OK; 1129 } 1130 1131 /* rectangle invert - i.e. text cursor and text selection */ 1132 status_t nv_acc_setup_rect_invert_dma() 1133 { 1134 /* setup solid pattern: 1135 * wait for room in fifo for pattern cmd if needed. */ 1136 if (nv_acc_fifofree_dma(7) != B_OK) return B_ERROR; 1137 /* now setup pattern (writing 7 32bit words) */ 1138 nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETSHAPE, 1); 1139 si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0x00000000; /* SetShape: 0 = 8x8, 1 = 64x1, 2 = 1x64 */ 1140 nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETCOLOR0, 4); 1141 si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xffffffff; /* SetColor0 */ 1142 si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xffffffff; /* SetColor1 */ 1143 si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xffffffff; /* SetPattern[0] */ 1144 si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xffffffff; /* SetPattern[1] */ 1145 1146 /* ROP registers (Raster OPeration): 1147 * wait for room in fifo for ROP cmd if needed. */ 1148 if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR; 1149 /* now setup ROP (writing 2 32bit words) for GXinvert */ 1150 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1); 1151 si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0x55; /* SetRop5 */ 1152 1153 /* reset fill color: 1154 * wait for room in fifo for bitmap cmd if needed. */ 1155 if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR; 1156 /* now reset color (writing 2 32bit words) */ 1157 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1); 1158 si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0x00000000; /* Color1A */ 1159 1160 return B_OK; 1161 } 1162 1163 status_t nv_acc_rectangle_invert_dma(uint32 xs,uint32 xe,uint32 ys,uint32 yl) 1164 { 1165 /* instruct engine what to fill: 1166 * wait for room in fifo for bitmap cmd if needed. */ 1167 if (nv_acc_fifofree_dma(3) != B_OK) return B_ERROR; 1168 /* now setup fill (writing 3 32bit words) */ 1169 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, 2); 1170 si->engine.dma.cmdbuffer[si->engine.dma.current++] = 1171 ((xs << 16) | (ys & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */ 1172 si->engine.dma.cmdbuffer[si->engine.dma.current++] = 1173 (((xe - xs) << 16) | (yl & 0x0000ffff)); /* Unclipped Rect 0 WidthHeight */ 1174 1175 /* tell the engine to fetch the commands in the DMA buffer that where not 1176 * executed before. At this time the setup done by nv_acc_setup_rectangle_dma() is 1177 * also executed on the first call of nv_acc_rectangle_dma(). */ 1178 nv_start_dma(); 1179 1180 return B_OK; 1181 } 1182