1 /* NV Acceleration functions */
2
3 /* Author:
4 Rudolf Cornelissen 8/2003-6/2010.
5
6 This code was possible thanks to:
7 - the Linux XFree86 NV driver,
8 - the Linux UtahGLX 3D driver.
9 */
10
11 #define MODULE_BIT 0x00080000
12
13 #include "nv_std.h"
14
15 /*acceleration notes*/
16
17 /*functions Be's app_server uses:
18 fill span (horizontal only)
19 fill rectangle (these 2 are very similar)
20 invert rectangle
21 blit
22 */
23
24 static void nv_init_for_3D_dma(void);
25 static void nv_start_dma(void);
26 static status_t nv_acc_fifofree_dma(uint16 cmd_size);
27 static void nv_acc_cmd_dma(uint32 cmd, uint16 offset, uint16 size);
28 static void nv_acc_set_ch_dma(uint16 ch, uint32 handle);
29
30 /* used to track engine DMA stalls */
31 static uint8 err;
32
33 /* wait until engine completely idle */
nv_acc_wait_idle_dma()34 status_t nv_acc_wait_idle_dma()
35 {
36 /* we'd better check for timeouts on the DMA engine as it's theoretically
37 * breakable by malfunctioning software */
38 uint16 cnt = 0;
39
40 /* wait until all upcoming commands are in execution at least. Do this until
41 * we hit a timeout; abort if we failed at least three times before:
42 * if DMA stalls, we have to forget about it alltogether at some point, or
43 * the system will almost come to a complete halt.. */
44 /* note:
45 * it doesn't matter which FIFO channel's DMA registers we access, they are in
46 * fact all the same set. It also doesn't matter if the channel was assigned a
47 * command or not. */
48 while ((NV_REG32(NVACC_FIFO + NV_GENERAL_DMAGET) != (si->engine.dma.put << 2)) &&
49 (cnt < 10000) && (err < 3))
50 {
51 /* snooze a bit so I do not hammer the bus */
52 snooze (100);
53 cnt++;
54 }
55
56 /* log timeout if we had one */
57 if (cnt == 10000)
58 {
59 if (err < 3) err++;
60 LOG(4,("ACC_DMA: wait_idle; DMA timeout #%d, engine trouble!\n", err));
61 }
62
63 /* wait until execution completed */
64 while (ACCR(STATUS))
65 {
66 /* snooze a bit so I do not hammer the bus */
67 snooze (100);
68 }
69
70 return B_OK;
71 }
72
73 /* AFAIK this must be done for every new screenmode.
74 * Engine required init. */
nv_acc_init_dma()75 status_t nv_acc_init_dma()
76 {
77 uint32 cnt, tmp;
78 uint32 surf_depth, cmd_depth;
79 /* reset the engine DMA stalls counter */
80 err = 0;
81
82 /* a hanging engine only recovers from a complete power-down/power-up cycle */
83 NV_REG32(NV32_PWRUPCTRL) = 0xffff00ff;
84 snooze(1000);
85 NV_REG32(NV32_PWRUPCTRL) = 0xffffffff;
86
87 /* don't try this on NV20 and later.. */
88 /* note:
89 * the specific register that's responsible for the speedfix on NV18 is
90 * $00400ed8: bit 6 needs to be zero for fastest rendering (confirmed). */
91 /* note also:
92 * on NV28 the following ranges could be reset (confirmed):
93 * $00400000 upto/incl. $004002fc;
94 * $00400400 upto/incl. $004017fc;
95 * $0040180c upto/incl. $00401948;
96 * $00401994 upto/incl. $00401a80;
97 * $00401a94 upto/incl. $00401ffc.
98 * The intermediate ranges hang the engine upon resetting. */
99 if (si->ps.card_arch < NV20A)
100 {
101 /* actively reset the PGRAPH registerset (acceleration engine) */
102 for (cnt = 0x00400000; cnt < 0x00402000; cnt +=4)
103 {
104 NV_REG32(cnt) = 0x00000000;
105 }
106 }
107
108 /* setup PTIMER: */
109 LOG(4,("ACC_DMA: timer numerator $%08x, denominator $%08x\n", ACCR(PT_NUMERATOR), ACCR(PT_DENOMINATR)));
110
111 /* The NV28 BIOS programs PTIMER like this (see coldstarting in nv_info.c) */
112 //ACCW(PT_NUMERATOR, (si->ps.std_engine_clock * 20));
113 //ACCW(PT_DENOMINATR, 0x00000271);
114 /* Nouveau (march 2009) mentions something like: writing 8 and 3 to these regs breaks the timings
115 * on the LVDS hardware sequencing microcode. A correct solution involves calculations with the GPU PLL. */
116
117 /* For now use BIOS pre-programmed values if there */
118 if (!ACCR(PT_NUMERATOR) || !ACCR(PT_DENOMINATR)) {
119 /* set timer numerator to 8 (in b0-15) */
120 ACCW(PT_NUMERATOR, 0x00000008);
121 /* set timer denominator to 3 (in b0-15) */
122 ACCW(PT_DENOMINATR, 0x00000003);
123 }
124
125 /* disable timer-alarm INT requests (b0) */
126 ACCW(PT_INTEN, 0x00000000);
127 /* reset timer-alarm INT status bit (b0) */
128 ACCW(PT_INTSTAT, 0xffffffff);
129
130 /* enable PRAMIN write access on pre NV10 before programming it! */
131 if (si->ps.card_arch == NV04A)
132 {
133 /* set framebuffer config: type = notiling, PRAMIN write access enabled */
134 NV_REG32(NV32_PFB_CONFIG_0) = 0x00001114;
135 }
136 else
137 {
138 /* setup acc engine 'source' tile adressranges */
139 if ((si->ps.card_type <= NV40) || (si->ps.card_type == NV45))
140 {
141 ACCW(NV10_FBTIL0AD, 0);
142 ACCW(NV10_FBTIL1AD, 0);
143 ACCW(NV10_FBTIL2AD, 0);
144 ACCW(NV10_FBTIL3AD, 0);
145 ACCW(NV10_FBTIL4AD, 0);
146 ACCW(NV10_FBTIL5AD, 0);
147 ACCW(NV10_FBTIL6AD, 0);
148 ACCW(NV10_FBTIL7AD, 0);
149 ACCW(NV10_FBTIL0ED, (si->ps.memory_size - 1));
150 ACCW(NV10_FBTIL1ED, (si->ps.memory_size - 1));
151 ACCW(NV10_FBTIL2ED, (si->ps.memory_size - 1));
152 ACCW(NV10_FBTIL3ED, (si->ps.memory_size - 1));
153 ACCW(NV10_FBTIL4ED, (si->ps.memory_size - 1));
154 ACCW(NV10_FBTIL5ED, (si->ps.memory_size - 1));
155 ACCW(NV10_FBTIL6ED, (si->ps.memory_size - 1));
156 ACCW(NV10_FBTIL7ED, (si->ps.memory_size - 1));
157 }
158 else
159 {
160 /* NV41, 43, 44, G70 and up */
161 ACCW(NV41_FBTIL0AD, 0);
162 ACCW(NV41_FBTIL1AD, 0);
163 ACCW(NV41_FBTIL2AD, 0);
164 ACCW(NV41_FBTIL3AD, 0);
165 ACCW(NV41_FBTIL4AD, 0);
166 ACCW(NV41_FBTIL5AD, 0);
167 ACCW(NV41_FBTIL6AD, 0);
168 ACCW(NV41_FBTIL7AD, 0);
169 ACCW(NV41_FBTIL8AD, 0);
170 ACCW(NV41_FBTIL9AD, 0);
171 ACCW(NV41_FBTILAAD, 0);
172 ACCW(NV41_FBTILBAD, 0);
173 ACCW(NV41_FBTIL0ED, (si->ps.memory_size - 1));
174 ACCW(NV41_FBTIL1ED, (si->ps.memory_size - 1));
175 ACCW(NV41_FBTIL2ED, (si->ps.memory_size - 1));
176 ACCW(NV41_FBTIL3ED, (si->ps.memory_size - 1));
177 ACCW(NV41_FBTIL4ED, (si->ps.memory_size - 1));
178 ACCW(NV41_FBTIL5ED, (si->ps.memory_size - 1));
179 ACCW(NV41_FBTIL6ED, (si->ps.memory_size - 1));
180 ACCW(NV41_FBTIL7ED, (si->ps.memory_size - 1));
181 ACCW(NV41_FBTIL8ED, (si->ps.memory_size - 1));
182 ACCW(NV41_FBTIL9ED, (si->ps.memory_size - 1));
183 ACCW(NV41_FBTILAED, (si->ps.memory_size - 1));
184 ACCW(NV41_FBTILBED, (si->ps.memory_size - 1));
185
186 if (si->ps.card_type >= G70)
187 {
188 ACCW(G70_FBTILCAD, 0);
189 ACCW(G70_FBTILDAD, 0);
190 ACCW(G70_FBTILEAD, 0);
191 ACCW(G70_FBTILCED, (si->ps.memory_size - 1));
192 ACCW(G70_FBTILDED, (si->ps.memory_size - 1));
193 ACCW(G70_FBTILEED, (si->ps.memory_size - 1));
194 }
195 }
196 }
197
198 /*** PRAMIN ***/
199 /* first clear the entire RAMHT (hash-table) space to a defined state. It turns
200 * out at least NV11 will keep the previously programmed handles over resets and
201 * power-outages upto about 15 seconds!! Faulty entries might well hang the
202 * engine (confirmed on NV11).
203 * Note:
204 * this behaviour is not very strange: even very old DRAM chips are known to be
205 * able to do this, even though you should refresh them every few milliseconds or
206 * so. (Large memory cell capacitors, though different cells vary a lot in their
207 * capacity.)
208 * Of course data validity is not certain by a long shot over this large
209 * amount of time.. */
210 for(cnt = 0; cnt < 0x0400; cnt++)
211 NV_REG32(NVACC_HT_HANDL_00 + (cnt << 2)) = 0;
212 /* RAMHT (hash-table) space SETUP FIFO HANDLES */
213 /* note:
214 * 'instance' tells you where the engine command is stored in 'PR_CTXx_x' sets
215 * below: instance being b4-19 with baseadress NV_PRAMIN_CTX_0 (0x00700000).
216 * That command is linked to the handle noted here. This handle is then used to
217 * tell the FIFO to which engine command it is connected!
218 * (CTX registers are actually a sort of RAM space.) */
219 if (si->ps.card_arch >= NV40A)
220 {
221 /* (first set) */
222 ACCW(HT_HANDL_00, (0x80000000 | NV10_CONTEXT_SURFACES_2D)); /* 32bit handle (not used) */
223 ACCW(HT_VALUE_00, 0x0010114c); /* instance $114c, engine = acc engine, CHID = $00 */
224
225 ACCW(HT_HANDL_01, (0x80000000 | NV_IMAGE_BLIT)); /* 32bit handle */
226 ACCW(HT_VALUE_01, 0x00101148); /* instance $1148, engine = acc engine, CHID = $00 */
227
228 ACCW(HT_HANDL_02, (0x80000000 | NV4_GDI_RECTANGLE_TEXT)); /* 32bit handle */
229 ACCW(HT_VALUE_02, 0x0010114a); /* instance $114a, engine = acc engine, CHID = $00 */
230
231 /* (second set) */
232 ACCW(HT_HANDL_10, (0x80000000 | NV_ROP5_SOLID)); /* 32bit handle */
233 ACCW(HT_VALUE_10, 0x00101142); /* instance $1142, engine = acc engine, CHID = $00 */
234
235 ACCW(HT_HANDL_11, (0x80000000 | NV_IMAGE_BLACK_RECTANGLE)); /* 32bit handle */
236 ACCW(HT_VALUE_11, 0x00101144); /* instance $1144, engine = acc engine, CHID = $00 */
237
238 ACCW(HT_HANDL_12, (0x80000000 | NV_IMAGE_PATTERN)); /* 32bit handle */
239 ACCW(HT_VALUE_12, 0x00101146); /* instance $1146, engine = acc engine, CHID = $00 */
240
241 ACCW(HT_HANDL_13, (0x80000000 | NV_SCALED_IMAGE_FROM_MEMORY)); /* 32bit handle */
242 ACCW(HT_VALUE_13, 0x0010114e); /* instance $114e, engine = acc engine, CHID = $00 */
243 }
244 else
245 {
246 /* (first set) */
247 ACCW(HT_HANDL_00, (0x80000000 | NV4_SURFACE)); /* 32bit handle */
248 ACCW(HT_VALUE_00, 0x80011145); /* instance $1145, engine = acc engine, CHID = $00 */
249
250 ACCW(HT_HANDL_01, (0x80000000 | NV_IMAGE_BLIT)); /* 32bit handle */
251 ACCW(HT_VALUE_01, 0x80011146); /* instance $1146, engine = acc engine, CHID = $00 */
252
253 ACCW(HT_HANDL_02, (0x80000000 | NV4_GDI_RECTANGLE_TEXT)); /* 32bit handle */
254 ACCW(HT_VALUE_02, 0x80011147); /* instance $1147, engine = acc engine, CHID = $00 */
255
256 ACCW(HT_HANDL_03, (0x80000000 | NV4_CONTEXT_SURFACES_ARGB_ZS)); /* 32bit handle (3D) */
257 ACCW(HT_VALUE_03, 0x80011148); /* instance $1148, engine = acc engine, CHID = $00 */
258
259 /* NV4_ and NV10_DX5_TEXTURE_TRIANGLE should be identical */
260 ACCW(HT_HANDL_04, (0x80000000 | NV4_DX5_TEXTURE_TRIANGLE)); /* 32bit handle (3D) */
261 ACCW(HT_VALUE_04, 0x80011149); /* instance $1149, engine = acc engine, CHID = $00 */
262
263 /* NV4_ and NV10_DX6_MULTI_TEXTURE_TRIANGLE should be identical */
264 ACCW(HT_HANDL_05, (0x80000000 | NV4_DX6_MULTI_TEXTURE_TRIANGLE)); /* 32bit handle (not used) */
265 ACCW(HT_VALUE_05, 0x8001114a); /* instance $114a, engine = acc engine, CHID = $00 */
266
267 ACCW(HT_HANDL_06, (0x80000000 | NV1_RENDER_SOLID_LIN)); /* 32bit handle (not used) */
268 ACCW(HT_VALUE_06, 0x8001114c); /* instance $114c, engine = acc engine, CHID = $00 */
269
270 /* (second set) */
271 ACCW(HT_HANDL_10, (0x80000000 | NV_ROP5_SOLID)); /* 32bit handle */
272 ACCW(HT_VALUE_10, 0x80011142); /* instance $1142, engine = acc engine, CHID = $00 */
273
274 ACCW(HT_HANDL_11, (0x80000000 | NV_IMAGE_BLACK_RECTANGLE)); /* 32bit handle */
275 ACCW(HT_VALUE_11, 0x80011143); /* instance $1143, engine = acc engine, CHID = $00 */
276
277 ACCW(HT_HANDL_12, (0x80000000 | NV_IMAGE_PATTERN)); /* 32bit handle */
278 ACCW(HT_VALUE_12, 0x80011144); /* instance $1144, engine = acc engine, CHID = $00 */
279
280 ACCW(HT_HANDL_13, (0x80000000 | NV_SCALED_IMAGE_FROM_MEMORY)); /* 32bit handle */
281 ACCW(HT_VALUE_13, 0x8001114b); /* instance $114b, engine = acc engine, CHID = $00 */
282
283 //2007 3D tests..
284 if (si->ps.card_type == NV15)
285 {
286 ACCW(HT_HANDL_14, (0x80000000 | NV_TCL_PRIMITIVE_3D)); /* 32bit handle */
287 ACCW(HT_VALUE_14, 0x8001114d); /* instance $114d, engine = acc engine, CHID = $00 */
288 }
289
290 }
291
292 /* program CTX registers: CTX1 is mostly done later (colorspace dependant) */
293 /* note:
294 * CTX determines which HT handles point to what engine commands. */
295 /* note also:
296 * CTX registers are in fact in the same GPU internal RAM space as the engine's
297 * hashtable. This means that stuff programmed in here also survives resets and
298 * power-outages! (confirmed NV11) */
299 if (si->ps.card_arch >= NV40A)
300 {
301 /* setup a DMA define for use by command defines below. */
302 ACCW(PR_CTX0_R, 0x00003000); /* DMA page table present and of linear type;
303 * DMA target node is NVM (non-volatile memory?)
304 * (instead of doing PCI or AGP transfers) */
305 ACCW(PR_CTX1_R, (si->ps.memory_size - 1)); /* DMA limit: size is all cardRAM */
306 ACCW(PR_CTX2_R, ((0x00000000 & 0xfffff000) | 0x00000002));
307 /* DMA access type is READ_AND_WRITE;
308 * memory starts at start of cardRAM (b12-31):
309 * It's adress needs to be at a 4kb boundary! */
310 ACCW(PR_CTX3_R, 0x00000002); /* unknown (looks like this is rubbish/not needed?) */
311 /* setup set '0' for cmd NV_ROP5_SOLID */
312 ACCW(PR_CTX0_0, 0x02080043); /* NVclass $043, patchcfg ROP_AND, nv10+: little endian */
313 ACCW(PR_CTX1_0, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */
314 ACCW(PR_CTX2_0, 0x00000000); /* DMA0 and DMA1 instance invalid */
315 ACCW(PR_CTX3_0, 0x00000000); /* method traps disabled */
316 ACCW(PR_CTX0_1, 0x00000000); /* extra */
317 ACCW(PR_CTX1_1, 0x00000000); /* extra */
318 /* setup set '1' for cmd NV_IMAGE_BLACK_RECTANGLE */
319 ACCW(PR_CTX0_2, 0x02080019); /* NVclass $019, patchcfg ROP_AND, nv10+: little endian */
320 ACCW(PR_CTX1_2, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */
321 ACCW(PR_CTX2_2, 0x00000000); /* DMA0 and DMA1 instance invalid */
322 ACCW(PR_CTX3_2, 0x00000000); /* method traps disabled */
323 ACCW(PR_CTX0_3, 0x00000000); /* extra */
324 ACCW(PR_CTX1_3, 0x00000000); /* extra */
325 /* setup set '2' for cmd NV_IMAGE_PATTERN */
326 ACCW(PR_CTX0_4, 0x02080018); /* NVclass $018, patchcfg ROP_AND, nv10+: little endian */
327 ACCW(PR_CTX1_4, 0x02000000); /* colorspace not set, notify instance is $0200 (b16-31) */
328 ACCW(PR_CTX2_4, 0x00000000); /* DMA0 and DMA1 instance invalid */
329 ACCW(PR_CTX3_4, 0x00000000); /* method traps disabled */
330 ACCW(PR_CTX0_5, 0x00000000); /* extra */
331 ACCW(PR_CTX1_5, 0x00000000); /* extra */
332 /* setup set '4' for cmd NV12_IMAGE_BLIT */
333 ACCW(PR_CTX0_6, 0x0208009f); /* NVclass $09f, patchcfg ROP_AND, nv10+: little endian */
334 ACCW(PR_CTX1_6, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */
335 ACCW(PR_CTX2_6, 0x00001140); /* DMA0 instance is $1140, DMA1 instance invalid */
336 ACCW(PR_CTX3_6, 0x00001140); /* method trap 0 is $1140, trap 1 disabled */
337 ACCW(PR_CTX0_7, 0x00000000); /* extra */
338 ACCW(PR_CTX1_7, 0x00000000); /* extra */
339 /* setup set '5' for cmd NV4_GDI_RECTANGLE_TEXT */
340 ACCW(PR_CTX0_8, 0x0208004a); /* NVclass $04a, patchcfg ROP_AND, nv10+: little endian */
341 ACCW(PR_CTX1_8, 0x02000000); /* colorspace not set, notify instance is $0200 (b16-31) */
342 ACCW(PR_CTX2_8, 0x00000000); /* DMA0 and DMA1 instance invalid */
343 ACCW(PR_CTX3_8, 0x00000000); /* method traps disabled */
344 ACCW(PR_CTX0_9, 0x00000000); /* extra */
345 ACCW(PR_CTX1_9, 0x00000000); /* extra */
346 /* setup set '6' for cmd NV10_CONTEXT_SURFACES_2D */
347 ACCW(PR_CTX0_A, 0x02080062); /* NVclass $062, nv10+: little endian */
348 ACCW(PR_CTX1_A, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */
349 ACCW(PR_CTX2_A, 0x00001140); /* DMA0 instance is $1140, DMA1 instance invalid */
350 ACCW(PR_CTX3_A, 0x00001140); /* method trap 0 is $1140, trap 1 disabled */
351 ACCW(PR_CTX0_B, 0x00000000); /* extra */
352 ACCW(PR_CTX1_B, 0x00000000); /* extra */
353 /* setup set '7' for cmd NV_SCALED_IMAGE_FROM_MEMORY */
354 ACCW(PR_CTX0_C, 0x02080077); /* NVclass $077, nv10+: little endian */
355 ACCW(PR_CTX1_C, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */
356 ACCW(PR_CTX2_C, 0x00001140); /* DMA0 instance is $1140, DMA1 instance invalid */
357 ACCW(PR_CTX3_C, 0x00001140); /* method trap 0 is $1140, trap 1 disabled */
358 ACCW(PR_CTX0_D, 0x00000000); /* extra */
359 ACCW(PR_CTX1_D, 0x00000000); /* extra */
360 /* setup DMA set pointed at by PF_CACH1_DMAI */
361 ACCW(PR_CTX0_E, 0x00003002); /* DMA page table present and of linear type;
362 * DMA class is $002 (b0-11);
363 * DMA target node is NVM (non-volatile memory?)
364 * (instead of doing PCI or AGP transfers) */
365 ACCW(PR_CTX1_E, 0x00007fff); /* DMA limit: tablesize is 32k bytes */
366 ACCW(PR_CTX2_E, (((si->ps.memory_size - 1) & 0xffff8000) | 0x00000002));
367 /* DMA access type is READ_AND_WRITE;
368 * table is located at end of cardRAM (b12-31):
369 * It's adress needs to be at a 4kb boundary! */
370 }
371 else
372 {
373 /* setup a DMA define for use by command defines below. */
374 ACCW(PR_CTX0_R, 0x00003000); /* DMA page table present and of linear type;
375 * DMA target node is NVM (non-volatile memory?)
376 * (instead of doing PCI or AGP transfers) */
377 ACCW(PR_CTX1_R, (si->ps.memory_size - 1)); /* DMA limit: size is all cardRAM */
378 ACCW(PR_CTX2_R, ((0x00000000 & 0xfffff000) | 0x00000002));
379 /* DMA access type is READ_AND_WRITE;
380 * memory starts at start of cardRAM (b12-31):
381 * It's adress needs to be at a 4kb boundary! */
382 ACCW(PR_CTX3_R, 0x00000002); /* unknown (looks like this is rubbish/not needed?) */
383 /* setup set '0' for cmd NV_ROP5_SOLID */
384 ACCW(PR_CTX0_0, 0x01008043); /* NVclass $043, patchcfg ROP_AND, nv10+: little endian */
385 ACCW(PR_CTX1_0, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */
386 ACCW(PR_CTX2_0, 0x00000000); /* DMA0 and DMA1 instance invalid */
387 ACCW(PR_CTX3_0, 0x00000000); /* method traps disabled */
388 /* setup set '1' for cmd NV_IMAGE_BLACK_RECTANGLE */
389 ACCW(PR_CTX0_1, 0x01008019); /* NVclass $019, patchcfg ROP_AND, nv10+: little endian */
390 ACCW(PR_CTX1_1, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */
391 ACCW(PR_CTX2_1, 0x00000000); /* DMA0 and DMA1 instance invalid */
392 ACCW(PR_CTX3_1, 0x00000000); /* method traps disabled */
393 /* setup set '2' for cmd NV_IMAGE_PATTERN */
394 ACCW(PR_CTX0_2, 0x01008018); /* NVclass $018, patchcfg ROP_AND, nv10+: little endian */
395 ACCW(PR_CTX1_2, 0x00000002); /* colorspace not set, notify instance is $0200 (b16-31) */
396 ACCW(PR_CTX2_2, 0x00000000); /* DMA0 and DMA1 instance invalid */
397 ACCW(PR_CTX3_2, 0x00000000); /* method traps disabled */
398 /* setup set '3' for ... */
399 if(si->ps.card_arch >= NV10A)
400 {
401 /* ... cmd NV10_CONTEXT_SURFACES_2D */
402 ACCW(PR_CTX0_3, 0x01008062); /* NVclass $062, nv10+: little endian */
403 }
404 else
405 {
406 /* ... cmd NV4_SURFACE */
407 ACCW(PR_CTX0_3, 0x01008042); /* NVclass $042, nv10+: little endian */
408 }
409 ACCW(PR_CTX1_3, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */
410 ACCW(PR_CTX2_3, 0x11401140); /* DMA0 instance is $1140, DMA1 instance invalid */
411 ACCW(PR_CTX3_3, 0x00000000); /* method trap 0 is $1140, trap 1 disabled */
412 /* setup set '4' for ... */
413 if (si->ps.card_type >= NV11)
414 {
415 /* ... cmd NV12_IMAGE_BLIT */
416 ACCW(PR_CTX0_4, 0x0100809f); /* NVclass $09f, patchcfg ROP_AND, nv10+: little endian */
417 }
418 else
419 {
420 /* ... cmd NV_IMAGE_BLIT */
421 ACCW(PR_CTX0_4, 0x0100805f); /* NVclass $05f, patchcfg ROP_AND, nv10+: little endian */
422 }
423 ACCW(PR_CTX1_4, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */
424 ACCW(PR_CTX2_4, 0x11401140); /* DMA0 instance is $1140, DMA1 instance invalid */
425 ACCW(PR_CTX3_4, 0x00000000); /* method trap 0 is $1140, trap 1 disabled */
426 /* setup set '5' for cmd NV4_GDI_RECTANGLE_TEXT */
427 ACCW(PR_CTX0_5, 0x0100804a); /* NVclass $04a, patchcfg ROP_AND, nv10+: little endian */
428 ACCW(PR_CTX1_5, 0x00000002); /* colorspace not set, notify instance is $0200 (b16-31) */
429 ACCW(PR_CTX2_5, 0x00000000); /* DMA0 and DMA1 instance invalid */
430 ACCW(PR_CTX3_5, 0x00000000); /* method traps disabled */
431 /* setup set '6' ... */
432 if (si->ps.card_arch >= NV10A)
433 {
434 /* ... for cmd NV10_CONTEXT_SURFACES_ARGB_ZS */
435 ACCW(PR_CTX0_6, 0x00000093); /* NVclass $093, nv10+: little endian */
436 }
437 else
438 {
439 /* ... for cmd NV4_CONTEXT_SURFACES_ARGB_ZS */
440 ACCW(PR_CTX0_6, 0x00000053); /* NVclass $053, nv10+: little endian */
441 }
442 ACCW(PR_CTX1_6, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */
443 ACCW(PR_CTX2_6, 0x11401140); /* DMA0, DMA1 instance = $1140 */
444 ACCW(PR_CTX3_6, 0x00000000); /* method traps disabled */
445 /* setup set '7' ... */
446 if (si->ps.card_arch >= NV10A)
447 {
448 /* ... for cmd NV10_DX5_TEXTURE_TRIANGLE */
449 ACCW(PR_CTX0_7, 0x0300a094); /* NVclass $094, patchcfg ROP_AND, userclip enable,
450 * context surface0 valid, nv10+: little endian */
451 }
452 else
453 {
454 /* ... for cmd NV4_DX5_TEXTURE_TRIANGLE */
455 ACCW(PR_CTX0_7, 0x0300a054); /* NVclass $054, patchcfg ROP_AND, userclip enable,
456 * context surface0 valid */
457 }
458 ACCW(PR_CTX1_7, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */
459 ACCW(PR_CTX2_7, 0x11401140); /* DMA0, DMA1 instance = $1140 */
460 ACCW(PR_CTX3_7, 0x00000000); /* method traps disabled */
461 /* setup set '8' ... */
462 if (si->ps.card_arch >= NV10A)
463 {
464 /* ... for cmd NV10_DX6_MULTI_TEXTURE_TRIANGLE (not used) */
465 ACCW(PR_CTX0_8, 0x0300a095); /* NVclass $095, patchcfg ROP_AND, userclip enable,
466 * context surface0 valid, nv10+: little endian */
467 }
468 else
469 {
470 /* ... for cmd NV4_DX6_MULTI_TEXTURE_TRIANGLE (not used) */
471 ACCW(PR_CTX0_8, 0x0300a055); /* NVclass $055, patchcfg ROP_AND, userclip enable,
472 * context surface0 valid */
473 }
474 ACCW(PR_CTX1_8, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */
475 ACCW(PR_CTX2_8, 0x11401140); /* DMA0, DMA1 instance = $1140 */
476 ACCW(PR_CTX3_8, 0x00000000); /* method traps disabled */
477 /* setup set '9' for cmd NV_SCALED_IMAGE_FROM_MEMORY */
478 ACCW(PR_CTX0_9, 0x01018077); /* NVclass $077, patchcfg SRC_COPY,
479 * context surface0 valid, nv10+: little endian */
480 ACCW(PR_CTX1_9, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */
481 ACCW(PR_CTX2_9, 0x11401140); /* DMA0, DMA1 instance = $1140 */
482 ACCW(PR_CTX3_9, 0x00000000); /* method traps disabled */
483 /* setup set 'A' for cmd NV1_RENDER_SOLID_LIN (not used) */
484 ACCW(PR_CTX0_A, 0x0300a01c); /* NVclass $01c, patchcfg ROP_AND, userclip enable,
485 * context surface0 valid, nv10+: little endian */
486 ACCW(PR_CTX1_A, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */
487 ACCW(PR_CTX2_A, 0x11401140); /* DMA0, DMA1 instance = $1140 */
488 ACCW(PR_CTX3_A, 0x00000000); /* method traps disabled */
489 //2007 3D tests..
490 /* setup set 'B' ... */
491 if (si->ps.card_type == NV15)
492 {
493 /* ... for cmd NV11_TCL_PRIMITIVE_3D */
494 ACCW(PR_CTX0_B, 0x0300a096); /* NVclass $096, patchcfg ROP_AND, userclip enable,
495 * context surface0 valid, nv10+: little endian */
496 ACCW(PR_CTX1_B, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */
497 ACCW(PR_CTX2_B, 0x11401140); /* DMA0, DMA1 instance = $1140 */
498 ACCW(PR_CTX3_B, 0x00000000); /* method traps disabled */
499 }
500 /* setup DMA set pointed at by PF_CACH1_DMAI */
501 if (si->engine.agp_mode)
502 {
503 /* DMA page table present and of linear type;
504 * DMA class is $002 (b0-11);
505 * DMA target node is AGP */
506 ACCW(PR_CTX0_C, 0x00033002);
507 }
508 else
509 {
510 /* DMA page table present and of linear type;
511 * DMA class is $002 (b0-11);
512 * DMA target node is PCI */
513 ACCW(PR_CTX0_C, 0x00023002);
514 }
515 ACCW(PR_CTX1_C, 0x000fffff); /* DMA limit: tablesize is 1M bytes */
516 ACCW(PR_CTX2_C, (((uintptr_t)((uint8 *)(si->dma_buffer_pci))) | 0x00000002));
517 /* DMA access type is READ_AND_WRITE;
518 * table is located in main system RAM (b12-31):
519 * It's adress needs to be at a 4kb boundary! */
520
521 /* set the 3D rendering functions colordepth via BPIXEL's 'depth 2' */
522 /* note:
523 * setting a depth to 'invalid' (zero) makes the engine report
524 * ready with drawing 'immediately'. */
525 //fixme: NV30A and above (probably) needs to be corrected...
526 switch(si->dm.space)
527 {
528 case B_CMAP8:
529 if (si->ps.card_arch < NV30A)
530 /* set depth 2: $1 = Y8 */
531 ACCW(BPIXEL, 0x00000100);
532 else
533 /* set depth 0-1: $1 = Y8, $2 = X1R5G5B5_Z1R5G5B5 */
534 ACCW(BPIXEL, 0x00000021);
535 break;
536 case B_RGB15_LITTLE:
537 if (si->ps.card_arch < NV30A)
538 /* set depth 2: $4 = A1R5G5B5 */
539 ACCW(BPIXEL, 0x00000400);
540 else
541 /* set depth 0-1: $2 = X1R5G5B5_Z1R5G5B5, $4 = A1R5G5B5 */
542 ACCW(BPIXEL, 0x00000042);
543 break;
544 case B_RGB16_LITTLE:
545 if (si->ps.card_arch < NV30A)
546 /* set depth 2: $5 = R5G6B5 */
547 ACCW(BPIXEL, 0x00000500);
548 else
549 /* set depth 0-1: $5 = R5G6B5, $a = X1A7R8G8B8_O1A7R8G8B8 */
550 ACCW(BPIXEL, 0x000000a5);
551 break;
552 case B_RGB32_LITTLE:
553 case B_RGBA32_LITTLE:
554 if (si->ps.card_arch < NV30A)
555 /* set depth 2: $c = A8R8G8B8 */
556 ACCW(BPIXEL, 0x00000c00);
557 else
558 /* set depth 0-1: $7 = X8R8G8B8_Z8R8G8B8, $e = V8YB8U8YA8 */
559 ACCW(BPIXEL, 0x000000e7);
560 break;
561 default:
562 LOG(8,("ACC: init, invalid bit depth\n"));
563 return B_ERROR;
564 }
565 }
566
567 if (si->ps.card_arch == NV04A)
568 {
569 /* do a explicit engine reset */
570 ACCW(DEBUG0, 0x000001ff);
571
572 /* init some function blocks */
573 /* DEBUG0, b20 and b21 should be high, this has a big influence on
574 * 3D rendering speed! (on all cards, confirmed) */
575 ACCW(DEBUG0, 0x1230c000);
576 /* DEBUG1, b19 = 1 increases 3D rendering speed on TNT2 (M64) a bit,
577 * TNT1 rendering speed stays the same (all cards confirmed) */
578 ACCW(DEBUG1, 0x72191101);
579 ACCW(DEBUG2, 0x11d5f071);
580 ACCW(DEBUG3, 0x0004ff31);
581 /* init OP methods */
582 ACCW(DEBUG3, 0x4004ff31);
583
584 /* disable all acceleration engine INT reguests */
585 ACCW(ACC_INTE, 0x00000000);
586 /* reset all acceration engine INT status bits */
587 ACCW(ACC_INTS, 0xffffffff);
588 /* context control enabled */
589 ACCW(NV04_CTX_CTRL, 0x10010100);
590 /* all acceleration buffers, pitches and colors are valid */
591 ACCW(NV04_ACC_STAT, 0xffffffff);
592 /* enable acceleration engine command FIFO */
593 ACCW(FIFO_EN, 0x00000001);
594
595 /* setup location of active screen in framebuffer */
596 ACCW(OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
597 ACCW(OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
598 /* setup accesible card memory range */
599 ACCW(BLIMIT0, (si->ps.memory_size - 1));
600 ACCW(BLIMIT1, (si->ps.memory_size - 1));
601
602 /* pattern shape value = 8x8, 2 color */
603 //fixme: not needed, unless the engine has a hardware fault (setting via cmd)!
604 //ACCW(PAT_SHP, 0x00000000);
605 /* Pgraph Beta AND value (fraction) b23-30 */
606 ACCW(BETA_AND_VAL, 0xffffffff);
607 }
608 else
609 {
610 /* do a explicit engine reset */
611 ACCW(DEBUG0, 0xffffffff);
612 ACCW(DEBUG0, 0x00000000);
613 /* disable all acceleration engine INT reguests */
614 ACCW(ACC_INTE, 0x00000000);
615 /* reset all acceration engine INT status bits */
616 ACCW(ACC_INTS, 0xffffffff);
617 /* context control enabled */
618 ACCW(NV10_CTX_CTRL, 0x10010100);
619 /* all acceleration buffers, pitches and colors are valid */
620 ACCW(NV10_ACC_STAT, 0xffffffff);
621 /* enable acceleration engine command FIFO */
622 ACCW(FIFO_EN, 0x00000001);
623 /* setup surface type:
624 * b1-0 = %01 = surface type is non-swizzle;
625 * this is needed to enable 3D on NV1x (confirmed) and maybe others? */
626 ACCW(NV10_SURF_TYP, ((ACCR(NV10_SURF_TYP)) & 0x0007ff00));
627 ACCW(NV10_SURF_TYP, ((ACCR(NV10_SURF_TYP)) | 0x00020101));
628 }
629
630 if (si->ps.card_arch == NV10A)
631 {
632 /* init some function blocks */
633 ACCW(DEBUG1, 0x00118700);
634 /* DEBUG2 has a big influence on 3D speed for NV11 and NV15
635 * (confirmed b3 and b18 should both be '1' on both cards!)
636 * (b16 should also be '1', increases 3D speed on NV11 a bit more) */
637 ACCW(DEBUG2, 0x24fd2ad9);
638 ACCW(DEBUG3, 0x55de0030);
639 /* NV10_DEBUG4 has a big influence on 3D speed for NV11, NV15 and NV18
640 * (confirmed b14 and b15 should both be '1' on these cards!)
641 * (confirmed b8 should be '0' on NV18 to prevent complete engine crash!) */
642 ACCW(NV10_DEBUG4, 0x0000c000);
643
644 /* copy tile setup stuff from 'source' to acc engine */
645 for (cnt = 0; cnt < 32; cnt++)
646 {
647 NV_REG32(NVACC_NV10_TIL0AD + (cnt << 2)) =
648 NV_REG32(NVACC_NV10_FBTIL0AD + (cnt << 2));
649 }
650
651 /* setup location of active screen in framebuffer */
652 ACCW(OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
653 ACCW(OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
654 /* setup accesible card memory range */
655 ACCW(BLIMIT0, (si->ps.memory_size - 1));
656 ACCW(BLIMIT1, (si->ps.memory_size - 1));
657
658 /* pattern shape value = 8x8, 2 color */
659 //fixme: not needed, unless the engine has a hardware fault (setting via cmd)!
660 //ACCW(PAT_SHP, 0x00000000);
661 /* Pgraph Beta AND value (fraction) b23-30 */
662 ACCW(BETA_AND_VAL, 0xffffffff);
663 }
664
665 if (si->ps.card_arch >= NV20A)
666 {
667 switch (si->ps.card_arch)
668 {
669 case NV40A:
670 /* init some function blocks */
671 ACCW(DEBUG1, 0x401287c0);
672 ACCW(DEBUG3, 0x60de8051);
673 /* disable specific functions, but enable SETUP_SPARE2 register */
674 ACCW(NV10_DEBUG4, 0x00008000);
675 /* set limit_viol_pix_adress(?): more likely something unknown.. */
676 ACCW(NV25_WHAT0, 0x00be3c5f);
677
678 /* setup some unknown serially accessed registers (?) */
679 tmp = (NV_REG32(NV32_NV4X_WHAT0) & 0x000000ff);
680 for (cnt = 0; (tmp && !(tmp & 0x00000001)); tmp >>= 1, cnt++)
681 {
682 ACCW(NV4X_WHAT2, cnt);
683 }
684
685 /* unknown.. */
686 switch (si->ps.card_type)
687 {
688 case NV40:
689 case NV45:
690 /* and NV48: but these are pgm'd as NV45 currently */
691 ACCW(NV40_WHAT0, 0x83280fff);
692 ACCW(NV40_WHAT1, 0x000000a0);
693 ACCW(NV40_WHAT2, 0x0078e366);
694 ACCW(NV40_WHAT3, 0x0000014c);
695 break;
696 case NV41:
697 /* and ID == 0x012x: but no cards defined yet */
698 ACCW(NV40P_WHAT0, 0x83280eff);
699 ACCW(NV40P_WHAT1, 0x000000a0);
700 ACCW(NV40P_WHAT2, 0x007596ff);
701 ACCW(NV40P_WHAT3, 0x00000108);
702 break;
703 case NV43:
704 ACCW(NV40P_WHAT0, 0x83280eff);
705 ACCW(NV40P_WHAT1, 0x000000a0);
706 ACCW(NV40P_WHAT2, 0x0072cb77);
707 ACCW(NV40P_WHAT3, 0x00000108);
708 break;
709 case NV44:
710 case G72:
711 ACCW(NV40P_WHAT0, 0x83280eff);
712 ACCW(NV40P_WHAT1, 0x000000a0);
713
714 NV_REG32(NV32_NV44_WHAT10) = NV_REG32(NV32_NV10STRAPINFO);
715 NV_REG32(NV32_NV44_WHAT11) = 0x00000000;
716 NV_REG32(NV32_NV44_WHAT12) = 0x00000000;
717 NV_REG32(NV32_NV44_WHAT13) = NV_REG32(NV32_NV10STRAPINFO);
718
719 ACCW(NV44_WHAT2, 0x00000000);
720 ACCW(NV44_WHAT3, 0x00000000);
721 break;
722 /* case NV44 type 2: (cardID 0x022x)
723 //fixme if needed: doesn't seem to need the strapinfo thing..
724 ACCW(NV40P_WHAT0, 0x83280eff);
725 ACCW(NV40P_WHAT1, 0x000000a0);
726
727 ACCW(NV44_WHAT2, 0x00000000);
728 ACCW(NV44_WHAT3, 0x00000000);
729 break;
730 */ case G70:
731 case G71:
732 case G73:
733 ACCW(NV40P_WHAT0, 0x83280eff);
734 ACCW(NV40P_WHAT1, 0x000000a0);
735 ACCW(NV40P_WHAT2, 0x07830610);
736 ACCW(NV40P_WHAT3, 0x0000016a);
737 break;
738 default:
739 ACCW(NV40P_WHAT0, 0x83280eff);
740 ACCW(NV40P_WHAT1, 0x000000a0);
741 break;
742 }
743
744 ACCW(NV10_TIL3PT, 0x2ffff800);
745 ACCW(NV10_TIL3ST, 0x00006000);
746 ACCW(NV4X_WHAT1, 0x01000000);
747 /* engine data source DMA instance = $1140 */
748 ACCW(NV4X_DMA_SRC, 0x00001140);
749 break;
750 case NV30A:
751 /* init some function blocks, but most is unknown.. */
752 ACCW(DEBUG1, 0x40108700);
753 ACCW(NV25_WHAT1, 0x00140000);
754 ACCW(DEBUG3, 0xf00e0431);
755 ACCW(NV10_DEBUG4, 0x00008000);
756 ACCW(NV25_WHAT0, 0xf04b1f36);
757 ACCW(NV20_WHAT3, 0x1002d888);
758 ACCW(NV25_WHAT2, 0x62ff007f);
759 break;
760 case NV20A:
761 /* init some function blocks, but most is unknown.. */
762 ACCW(DEBUG1, 0x00118700);
763 ACCW(DEBUG3, 0xf20e0431);
764 ACCW(NV10_DEBUG4, 0x00000000);
765 ACCW(NV20_WHAT1, 0x00000040);
766 if (si->ps.card_type < NV25)
767 {
768 ACCW(NV20_WHAT2, 0x00080000);
769 ACCW(NV10_DEBUG5, 0x00000005);
770 ACCW(NV20_WHAT3, 0x45caa208);
771 ACCW(NV20_WHAT4, 0x24000000);
772 ACCW(NV20_WHAT5, 0x00000040);
773
774 /* copy some fixed RAM(?) configuration info(?) to some indexed registers: */
775 /* b16-24 is select; b2-13 is adress in 32-bit words */
776 ACCW(RDI_INDEX, 0x00e00038);
777 /* data is 32-bit */
778 ACCW(RDI_DATA, 0x00000030);
779 /* copy some fixed RAM(?) configuration info(?) to some indexed registers: */
780 /* b16-24 is select; b2-13 is adress in 32-bit words */
781 ACCW(RDI_INDEX, 0x00e10038);
782 /* data is 32-bit */
783 ACCW(RDI_DATA, 0x00000030);
784 }
785 else
786 {
787 ACCW(NV25_WHAT1, 0x00080000);
788 ACCW(NV25_WHAT0, 0x304b1fb6);
789 ACCW(NV20_WHAT3, 0x18b82880);
790 ACCW(NV20_WHAT4, 0x44000000);
791 ACCW(NV20_WHAT5, 0x40000080);
792 ACCW(NV25_WHAT2, 0x000000ff);
793 }
794 break;
795 }
796
797 /* NV20A, NV30A and NV40A: */
798 /* copy tile setup stuff from previous setup 'source' to acc engine
799 * (pattern colorRAM?) */
800 if ((si->ps.card_type <= NV40) || (si->ps.card_type == NV45))
801 {
802 for (cnt = 0; cnt < 32; cnt++)
803 {
804 /* copy NV10_FBTIL0AD upto/including NV10_FBTIL7ST */
805 NV_REG32(NVACC_NV20_WHAT0 + (cnt << 2)) =
806 NV_REG32(NVACC_NV10_FBTIL0AD + (cnt << 2));
807
808 /* copy NV10_FBTIL0AD upto/including NV10_FBTIL7ST */
809 NV_REG32(NVACC_NV20_2_WHAT0 + (cnt << 2)) =
810 NV_REG32(NVACC_NV10_FBTIL0AD + (cnt << 2));
811 }
812 }
813 else
814 {
815 /* NV41, 43, 44, G70 and later */
816 if (si->ps.card_type >= G70)
817 {
818 for (cnt = 0; cnt < 60; cnt++)
819 {
820 /* copy NV41_FBTIL0AD upto/including G70_FBTILEST */
821 NV_REG32(NVACC_NV41_WHAT0 + (cnt << 2)) =
822 NV_REG32(NVACC_NV41_FBTIL0AD + (cnt << 2));
823
824 /* copy NV41_FBTIL0AD upto/including G70_FBTILEST */
825 NV_REG32(NVACC_NV20_2_WHAT0 + (cnt << 2)) =
826 NV_REG32(NVACC_NV41_FBTIL0AD + (cnt << 2));
827 }
828 }
829 else
830 {
831 /* NV41, 43, 44 */
832 for (cnt = 0; cnt < 48; cnt++)
833 {
834 /* copy NV41_FBTIL0AD upto/including NV41_FBTILBST */
835 NV_REG32(NVACC_NV20_WHAT0 + (cnt << 2)) =
836 NV_REG32(NVACC_NV41_FBTIL0AD + (cnt << 2));
837
838 if (si->ps.card_type != NV44)
839 {
840 /* copy NV41_FBTIL0AD upto/including NV41_FBTILBST */
841 NV_REG32(NVACC_NV20_2_WHAT0 + (cnt << 2)) =
842 NV_REG32(NVACC_NV41_FBTIL0AD + (cnt << 2));
843 }
844 }
845 }
846 }
847
848 if (si->ps.card_arch >= NV40A)
849 {
850 if ((si->ps.card_type == NV40) || (si->ps.card_type == NV45))
851 {
852 /* copy some RAM configuration info(?) */
853 ACCW(NV20_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0));
854 ACCW(NV20_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1));
855 ACCW(NV40_WHAT_T2, NV_REG32(NV32_PFB_CONFIG_0));
856 ACCW(NV40_WHAT_T3, NV_REG32(NV32_PFB_CONFIG_1));
857
858 /* setup location of active screen in framebuffer */
859 ACCW(NV20_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
860 ACCW(NV20_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
861 /* setup accesible card memory range */
862 ACCW(NV20_BLIMIT6, (si->ps.memory_size - 1));
863 ACCW(NV20_BLIMIT7, (si->ps.memory_size - 1));
864 }
865 else
866 {
867 /* NV41, 43, 44, G70 and later */
868
869 /* copy some RAM configuration info(?) */
870 if (si->ps.card_type >= G70)
871 {
872 ACCW(G70_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0));
873 ACCW(G70_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1));
874 }
875 else
876 {
877 /* NV41, 43, 44 */
878 ACCW(NV40P_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0));
879 ACCW(NV40P_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1));
880 }
881 ACCW(NV40P_WHAT_T2, NV_REG32(NV32_PFB_CONFIG_0));
882 ACCW(NV40P_WHAT_T3, NV_REG32(NV32_PFB_CONFIG_1));
883
884 /* setup location of active screen in framebuffer */
885 ACCW(NV40P_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
886 ACCW(NV40P_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
887 /* setup accesible card memory range */
888 ACCW(NV40P_BLIMIT6, (si->ps.memory_size - 1));
889 ACCW(NV40P_BLIMIT7, (si->ps.memory_size - 1));
890 }
891 }
892 else /* NV20A and NV30A: */
893 {
894 /* copy some RAM configuration info(?) */
895 ACCW(NV20_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0));
896 ACCW(NV20_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1));
897 /* copy some RAM configuration info(?) to some indexed registers: */
898 /* b16-24 is select; b2-13 is adress in 32-bit words */
899 ACCW(RDI_INDEX, 0x00ea0000);
900 /* data is 32-bit */
901 ACCW(RDI_DATA, NV_REG32(NV32_PFB_CONFIG_0));
902 /* b16-24 is select; b2-13 is adress in 32-bit words */
903 ACCW(RDI_INDEX, 0x00ea0004);
904 /* data is 32-bit */
905 ACCW(RDI_DATA, NV_REG32(NV32_PFB_CONFIG_1));
906
907 /* setup location of active screen in framebuffer */
908 ACCW(NV20_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
909 ACCW(NV20_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
910 /* setup accesible card memory range */
911 ACCW(NV20_BLIMIT6, (si->ps.memory_size - 1));
912 ACCW(NV20_BLIMIT7, (si->ps.memory_size - 1));
913 }
914
915 /* NV20A, NV30A and NV40A: */
916 /* setup some acc engine tile stuff */
917 ACCW(NV10_TIL2AD, 0x00000000);
918 ACCW(NV10_TIL0ED, 0xffffffff);
919 }
920
921 /* all cards: */
922 /* setup clipping: rect size is 32768 x 32768, probably max. setting */
923 /* note:
924 * can also be done via the NV_IMAGE_BLACK_RECTANGLE engine command. */
925 ACCW(ABS_UCLP_XMIN, 0x00000000);
926 ACCW(ABS_UCLP_YMIN, 0x00000000);
927 ACCW(ABS_UCLP_XMAX, 0x00007fff);
928 ACCW(ABS_UCLP_YMAX, 0x00007fff);
929
930 /* setup sync parameters for NV12_IMAGE_BLIT command for the current mode:
931 * values given are CRTC vertical counter limit values. The NV12 command will wait
932 * for the specified's CRTC's vertical counter to be in between the given values */
933 if (si->ps.card_type >= NV11)
934 {
935 ACCW(NV11_CRTC_LO, si->dm.timing.v_display - 1);
936 ACCW(NV11_CRTC_HI, si->dm.timing.v_display + 1);
937 }
938
939 /*** PFIFO ***/
940 /* (setup caches) */
941 /* disable caches reassign */
942 ACCW(PF_CACHES, 0x00000000);
943 /* PFIFO mode: channel 0 is in DMA mode, channels 1 - 32 are in PIO mode */
944 ACCW(PF_MODE, 0x00000001);
945 /* cache1 push0 access disabled */
946 ACCW(PF_CACH1_PSH0, 0x00000000);
947 /* cache1 pull0 access disabled */
948 ACCW(PF_CACH1_PUL0, 0x00000000);
949 /* cache1 push1 mode = DMA */
950 if (si->ps.card_arch >= NV40A)
951 ACCW(PF_CACH1_PSH1, 0x00010000);
952 else
953 ACCW(PF_CACH1_PSH1, 0x00000100);
954 /* cache1 DMA Put offset = 0 (b2-28) */
955 ACCW(PF_CACH1_DMAP, 0x00000000);
956 /* cache1 DMA Get offset = 0 (b2-28) */
957 ACCW(PF_CACH1_DMAG, 0x00000000);
958 /* cache1 DMA instance adress = $114e (b0-15);
959 * instance being b4-19 with baseadress NV_PRAMIN_CTX_0 (0x00700000). */
960 /* note:
961 * should point to a DMA definition in CTX register space (which is sort of RAM).
962 * This define tells the engine where the DMA cmd buffer is and what it's size is.
963 * Inside that cmd buffer you'll find the actual issued engine commands. */
964 if (si->ps.card_arch >= NV40A)
965 ACCW(PF_CACH1_DMAI, 0x00001150);
966 else
967 //2007 3d test..
968 ACCW(PF_CACH1_DMAI, 0x0000114e);
969 /* cache0 push0 access disabled */
970 ACCW(PF_CACH0_PSH0, 0x00000000);
971 /* cache0 pull0 access disabled */
972 ACCW(PF_CACH0_PUL0, 0x00000000);
973 /* RAM HT (hash table) baseadress = $10000 (b4-8), size = 4k,
974 * search = 128 (is byte offset between hash 'sets') */
975 /* note:
976 * so HT base is $00710000, last is $00710fff.
977 * In this space you define the engine command handles (HT_HANDL_XX), which
978 * in turn points to the defines in CTX register space (which is sort of RAM) */
979 ACCW(PF_RAMHT, 0x03000100);
980 /* RAM FC baseadress = $11000 (b3-8) (size is fixed to 0.5k(?)) */
981 /* note:
982 * so FC base is $00711000, last is $007111ff. (not used?) */
983 ACCW(PF_RAMFC, 0x00000110);
984 /* RAM RO baseadress = $11200 (b1-8), size = 0.5k */
985 /* note:
986 * so RO base is $00711200, last is $007113ff. (not used?) */
987 /* note also:
988 * This means(?) the PRAMIN CTX registers are accessible from base $00711400. */
989 ACCW(PF_RAMRO, 0x00000112);
990 /* PFIFO size: ch0-15 = 512 bytes, ch16-31 = 124 bytes */
991 ACCW(PF_SIZE, 0x0000ffff);
992 /* cache1 hash instance = $ffff (b0-15) */
993 ACCW(PF_CACH1_HASH, 0x0000ffff);
994 /* disable all PFIFO INTs */
995 ACCW(PF_INTEN, 0x00000000);
996 /* reset all PFIFO INT status bits */
997 ACCW(PF_INTSTAT, 0xffffffff);
998 /* cache0 pull0 engine = acceleration engine (graphics) */
999 ACCW(PF_CACH0_PUL1, 0x00000001);
1000 /* cache1 DMA control: disable some stuff */
1001 ACCW(PF_CACH1_DMAC, 0x00000000);
1002 /* cache1 engine 0 upto/including 7 is software (could also be graphics or DVD) */
1003 ACCW(PF_CACH1_ENG, 0x00000000);
1004 /* cache1 DMA fetch: trigger at 128 bytes, size is 32 bytes, max requests is 15,
1005 * use little endian */
1006 ACCW(PF_CACH1_DMAF, 0x000f0078);
1007 /* cache1 DMA push: b0 = 1: access is enabled */
1008 ACCW(PF_CACH1_DMAS, 0x00000001);
1009 /* cache1 push0 access enabled */
1010 ACCW(PF_CACH1_PSH0, 0x00000001);
1011 /* cache1 pull0 access enabled */
1012 ACCW(PF_CACH1_PUL0, 0x00000001);
1013 /* cache1 pull1 engine = acceleration engine (graphics) */
1014 ACCW(PF_CACH1_PUL1, 0x00000001);
1015 /* enable PFIFO caches reassign */
1016 ACCW(PF_CACHES, 0x00000001);
1017
1018 /* setup 3D specifics */
1019 nv_init_for_3D_dma();
1020
1021 /*** init acceleration engine command info ***/
1022 /* set object handles */
1023 /* note:
1024 * probably depending on some other setup, there are 8 or 32 FIFO channels
1025 * available. Assuming the current setup only has 8 channels because the 'rest'
1026 * isn't setup here... */
1027 si->engine.fifo.handle[0] = NV_ROP5_SOLID;
1028 si->engine.fifo.handle[1] = NV_IMAGE_BLACK_RECTANGLE;
1029 si->engine.fifo.handle[2] = NV_IMAGE_PATTERN;
1030 si->engine.fifo.handle[3] = NV4_SURFACE; /* NV10_CONTEXT_SURFACES_2D is identical */
1031 si->engine.fifo.handle[4] = NV_IMAGE_BLIT;
1032 si->engine.fifo.handle[5] = NV4_GDI_RECTANGLE_TEXT;
1033 si->engine.fifo.handle[6] = NV4_CONTEXT_SURFACES_ARGB_ZS;//NV1_RENDER_SOLID_LIN;
1034 si->engine.fifo.handle[7] = NV4_DX5_TEXTURE_TRIANGLE;
1035 /* preset no FIFO channels assigned to cmd's */
1036 for (cnt = 0; cnt < 0x20; cnt++)
1037 {
1038 si->engine.fifo.ch_ptr[cnt] = 0;
1039 }
1040 /* set handle's pointers to their assigned FIFO channels */
1041 /* note:
1042 * b0-1 aren't used as adressbits. Using b0 to indicate a valid pointer. */
1043 for (cnt = 0; cnt < 0x08; cnt++)
1044 {
1045 si->engine.fifo.ch_ptr[(si->engine.fifo.handle[cnt])]
1046 = (0x00000001 + (cnt * 0x00002000));
1047 }
1048
1049 /*** init DMA command buffer info ***/
1050 if (si->ps.card_arch >= NV40A) //main mem DMA buf on pre-NV40
1051 {
1052 si->dma_buffer = (void *)((char *)si->framebuffer
1053 + ((si->ps.memory_size - 1) & 0xffff8000));
1054 }
1055 LOG(4, ("ACC_DMA: command buffer is at adress $%p\n", si->dma_buffer));
1056 /* we have issued no DMA cmd's to the engine yet */
1057 si->engine.dma.put = 0;
1058 /* the current first free adress in the DMA buffer is at offset 0 */
1059 si->engine.dma.current = 0;
1060 /* the DMA buffer can hold 8k 32-bit words (it's 32kb in size),
1061 * or 256k 32-bit words (1Mb in size) dependant on architecture (for now) */
1062 /* note:
1063 * one word is reserved at the end of the DMA buffer to be able to instruct the
1064 * engine to do a buffer wrap-around!
1065 * (DMA opcode 'noninc method': issue word $20000000.) */
1066 if (si->ps.card_arch < NV40A)
1067 si->engine.dma.max = ((1 * 1024 * 1024) >> 2) - 1;
1068 else
1069 si->engine.dma.max = 8192 - 1;
1070 /* note the current free space we have left in the DMA buffer */
1071 si->engine.dma.free = si->engine.dma.max - si->engine.dma.current;
1072
1073 /*** init FIFO via DMA command buffer. ***/
1074 /* wait for room in fifo for new FIFO assigment cmds if needed: */
1075 if (si->ps.card_arch >= NV40A)
1076 {
1077 if (nv_acc_fifofree_dma(12) != B_OK) return B_ERROR;
1078 } else {
1079 if (nv_acc_fifofree_dma(16) != B_OK) return B_ERROR;
1080 }
1081
1082 /* program new FIFO assignments */
1083 /* Raster OPeration: */
1084 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH0, si->engine.fifo.handle[0]);
1085 /* Clip: */
1086 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH1, si->engine.fifo.handle[1]);
1087 /* Pattern: */
1088 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH2, si->engine.fifo.handle[2]);
1089 /* 2D Surfaces: */
1090 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH3, si->engine.fifo.handle[3]);
1091 /* Blit: */
1092 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH4, si->engine.fifo.handle[4]);
1093 /* Bitmap: */
1094 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH5, si->engine.fifo.handle[5]);
1095 if (si->ps.card_arch < NV40A)
1096 {
1097 /* 3D surfaces: (3D related only) */
1098 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH6, si->engine.fifo.handle[6]);
1099 /* Textured Triangle: (3D only) */
1100 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH7, si->engine.fifo.handle[7]);
1101 }
1102
1103 /*** Set pixel width ***/
1104 switch(si->dm.space)
1105 {
1106 case B_CMAP8:
1107 surf_depth = 0x00000001;
1108 cmd_depth = 0x00000003;
1109 break;
1110 case B_RGB15_LITTLE:
1111 case B_RGB16_LITTLE:
1112 surf_depth = 0x00000004;
1113 cmd_depth = 0x00000001;
1114 break;
1115 case B_RGB32_LITTLE:
1116 case B_RGBA32_LITTLE:
1117 surf_depth = 0x00000006;
1118 cmd_depth = 0x00000003;
1119 break;
1120 default:
1121 LOG(8,("ACC_DMA: init, invalid bit depth\n"));
1122 return B_ERROR;
1123 }
1124
1125 /* wait for room in fifo for surface setup cmd if needed */
1126 if (nv_acc_fifofree_dma(5) != B_OK) return B_ERROR;
1127 /* now setup 2D surface (writing 5 32bit words) */
1128 nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 4);
1129 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = surf_depth; /* Format */
1130 /* setup screen pitch */
1131 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
1132 ((si->fbc.bytes_per_row & 0x0000ffff) | (si->fbc.bytes_per_row << 16)); /* Pitch */
1133 /* setup screen location */
1134 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
1135 ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer); /* OffsetSource */
1136 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
1137 ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer); /* OffsetDest */
1138
1139 /* wait for room in fifo for pattern colordepth setup cmd if needed */
1140 if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR;
1141 /* set pattern colordepth (writing 2 32bit words) */
1142 nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETCOLORFORMAT, 1);
1143 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */
1144
1145 /* wait for room in fifo for bitmap colordepth setup cmd if needed */
1146 if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR;
1147 /* set bitmap colordepth (writing 2 32bit words) */
1148 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_SETCOLORFORMAT, 1);
1149 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */
1150
1151 /* Load our pattern into the engine: */
1152 /* wait for room in fifo for pattern cmd if needed. */
1153 if (nv_acc_fifofree_dma(7) != B_OK) return B_ERROR;
1154 /* now setup pattern (writing 7 32bit words) */
1155 nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETSHAPE, 1);
1156 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000000; /* SetShape: 0 = 8x8, 1 = 64x1, 2 = 1x64 */
1157 nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETCOLOR0, 4);
1158 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetColor0 */
1159 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetColor1 */
1160 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetPattern[0] */
1161 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xffffffff; /* SetPattern[1] */
1162
1163 /* tell the engine to fetch and execute all (new) commands in the DMA buffer */
1164 nv_start_dma();
1165
1166 return B_OK;
1167 }
1168
nv_init_for_3D_dma(void)1169 static void nv_init_for_3D_dma(void)
1170 {
1171 /* setup PGRAPH unknown registers and modify (pre-cleared) pipe stuff for 3D use */
1172 if (si->ps.card_arch >= NV10A)
1173 {
1174 /* setup unknown PGRAPH stuff */
1175 ACCW(PGWHAT_00, 0x00000000);
1176 ACCW(PGWHAT_01, 0x00000000);
1177 ACCW(PGWHAT_02, 0x00000000);
1178 ACCW(PGWHAT_03, 0x00000000);
1179
1180 ACCW(PGWHAT_04, 0x00001000);
1181 ACCW(PGWHAT_05, 0x00001000);
1182 ACCW(PGWHAT_06, 0x4003ff80);
1183
1184 ACCW(PGWHAT_07, 0x00000000);
1185 ACCW(PGWHAT_08, 0x00000000);
1186 ACCW(PGWHAT_09, 0x00000000);
1187 ACCW(PGWHAT_0A, 0x00000000);
1188 ACCW(PGWHAT_0B, 0x00000000);
1189
1190 ACCW(PGWHAT_0C, 0x00080008);
1191 ACCW(PGWHAT_0D, 0x00080008);
1192
1193 ACCW(PGWHAT_0E, 0x00000000);
1194 ACCW(PGWHAT_0F, 0x00000000);
1195 ACCW(PGWHAT_10, 0x00000000);
1196 ACCW(PGWHAT_11, 0x00000000);
1197 ACCW(PGWHAT_12, 0x00000000);
1198 ACCW(PGWHAT_13, 0x00000000);
1199 ACCW(PGWHAT_14, 0x00000000);
1200 ACCW(PGWHAT_15, 0x00000000);
1201 ACCW(PGWHAT_16, 0x00000000);
1202 ACCW(PGWHAT_17, 0x00000000);
1203 ACCW(PGWHAT_18, 0x00000000);
1204
1205 ACCW(PGWHAT_19, 0x10000000);
1206
1207 ACCW(PGWHAT_1A, 0x00000000);
1208 ACCW(PGWHAT_1B, 0x00000000);
1209 ACCW(PGWHAT_1C, 0x00000000);
1210 ACCW(PGWHAT_1D, 0x00000000);
1211 ACCW(PGWHAT_1E, 0x00000000);
1212 ACCW(PGWHAT_1F, 0x00000000);
1213 ACCW(PGWHAT_20, 0x00000000);
1214 ACCW(PGWHAT_21, 0x00000000);
1215
1216 ACCW(PGWHAT_22, 0x08000000);
1217
1218 ACCW(PGWHAT_23, 0x00000000);
1219 ACCW(PGWHAT_24, 0x00000000);
1220 ACCW(PGWHAT_25, 0x00000000);
1221 ACCW(PGWHAT_26, 0x00000000);
1222
1223 ACCW(PGWHAT_27, 0x4b7fffff);
1224
1225 ACCW(PGWHAT_28, 0x00000000);
1226 ACCW(PGWHAT_29, 0x00000000);
1227 ACCW(PGWHAT_2A, 0x00000000);
1228
1229 /* setup window clipping */
1230 /* b0-11 = min; b16-27 = max.
1231 * note:
1232 * probably two's complement values, so setting to max range here:
1233 * which would be -2048 upto/including +2047. */
1234 /* horizontal */
1235 ACCW(WINCLIP_H_0, 0x07ff0800);
1236 ACCW(WINCLIP_H_1, 0x07ff0800);
1237 ACCW(WINCLIP_H_2, 0x07ff0800);
1238 ACCW(WINCLIP_H_3, 0x07ff0800);
1239 ACCW(WINCLIP_H_4, 0x07ff0800);
1240 ACCW(WINCLIP_H_5, 0x07ff0800);
1241 ACCW(WINCLIP_H_6, 0x07ff0800);
1242 ACCW(WINCLIP_H_7, 0x07ff0800);
1243 /* vertical */
1244 ACCW(WINCLIP_V_0, 0x07ff0800);
1245 ACCW(WINCLIP_V_1, 0x07ff0800);
1246 ACCW(WINCLIP_V_2, 0x07ff0800);
1247 ACCW(WINCLIP_V_3, 0x07ff0800);
1248 ACCW(WINCLIP_V_4, 0x07ff0800);
1249 ACCW(WINCLIP_V_5, 0x07ff0800);
1250 ACCW(WINCLIP_V_6, 0x07ff0800);
1251 ACCW(WINCLIP_V_7, 0x07ff0800);
1252
1253 /* setup (initialize) pipe:
1254 * needed to get valid 3D rendering on (at least) NV1x cards. Without this
1255 * those cards produce rubbish instead of 3D, although the engine itself keeps
1256 * running and 2D stays OK. */
1257
1258 /* set eyetype to local, lightning etc. is off */
1259 ACCW(NV10_XFMOD0, 0x10000000);
1260 /* disable all lights */
1261 ACCW(NV10_XFMOD1, 0x00000000);
1262
1263 /* note: upon writing data into the PIPEDAT register, the PIPEADR is
1264 * probably auto-incremented! */
1265 /* (pipe adress = b2-16, pipe data = b0-31) */
1266 /* note: pipe adresses IGRAPH registers! */
1267 ACCW(NV10_PIPEADR, 0x00006740);
1268 ACCW(NV10_PIPEDAT, 0x00000000);
1269 ACCW(NV10_PIPEDAT, 0x00000000);
1270 ACCW(NV10_PIPEDAT, 0x00000000);
1271 ACCW(NV10_PIPEDAT, 0x3f800000);
1272
1273 ACCW(NV10_PIPEADR, 0x00006750);
1274 ACCW(NV10_PIPEDAT, 0x40000000);
1275 ACCW(NV10_PIPEDAT, 0x40000000);
1276 ACCW(NV10_PIPEDAT, 0x40000000);
1277 ACCW(NV10_PIPEDAT, 0x40000000);
1278
1279 ACCW(NV10_PIPEADR, 0x00006760);
1280 ACCW(NV10_PIPEDAT, 0x00000000);
1281 ACCW(NV10_PIPEDAT, 0x00000000);
1282 ACCW(NV10_PIPEDAT, 0x3f800000);
1283 ACCW(NV10_PIPEDAT, 0x00000000);
1284
1285 ACCW(NV10_PIPEADR, 0x00006770);
1286 ACCW(NV10_PIPEDAT, 0xc5000000);
1287 ACCW(NV10_PIPEDAT, 0xc5000000);
1288 ACCW(NV10_PIPEDAT, 0x00000000);
1289 ACCW(NV10_PIPEDAT, 0x00000000);
1290
1291 ACCW(NV10_PIPEADR, 0x00006780);
1292 ACCW(NV10_PIPEDAT, 0x00000000);
1293 ACCW(NV10_PIPEDAT, 0x00000000);
1294 ACCW(NV10_PIPEDAT, 0x3f800000);
1295 ACCW(NV10_PIPEDAT, 0x00000000);
1296
1297 ACCW(NV10_PIPEADR, 0x000067a0);
1298 ACCW(NV10_PIPEDAT, 0x3f800000);
1299 ACCW(NV10_PIPEDAT, 0x3f800000);
1300 ACCW(NV10_PIPEDAT, 0x3f800000);
1301 ACCW(NV10_PIPEDAT, 0x3f800000);
1302
1303 ACCW(NV10_PIPEADR, 0x00006ab0);
1304 ACCW(NV10_PIPEDAT, 0x3f800000);
1305 ACCW(NV10_PIPEDAT, 0x3f800000);
1306 ACCW(NV10_PIPEDAT, 0x3f800000);
1307
1308 ACCW(NV10_PIPEADR, 0x00006ac0);
1309 ACCW(NV10_PIPEDAT, 0x00000000);
1310 ACCW(NV10_PIPEDAT, 0x00000000);
1311 ACCW(NV10_PIPEDAT, 0x00000000);
1312
1313 ACCW(NV10_PIPEADR, 0x00006c10);
1314 ACCW(NV10_PIPEDAT, 0xbf800000);
1315
1316 ACCW(NV10_PIPEADR, 0x00007030);
1317 ACCW(NV10_PIPEDAT, 0x7149f2ca);
1318
1319 ACCW(NV10_PIPEADR, 0x00007040);
1320 ACCW(NV10_PIPEDAT, 0x7149f2ca);
1321
1322 ACCW(NV10_PIPEADR, 0x00007050);
1323 ACCW(NV10_PIPEDAT, 0x7149f2ca);
1324
1325 ACCW(NV10_PIPEADR, 0x00007060);
1326 ACCW(NV10_PIPEDAT, 0x7149f2ca);
1327
1328 ACCW(NV10_PIPEADR, 0x00007070);
1329 ACCW(NV10_PIPEDAT, 0x7149f2ca);
1330
1331 ACCW(NV10_PIPEADR, 0x00007080);
1332 ACCW(NV10_PIPEDAT, 0x7149f2ca);
1333
1334 ACCW(NV10_PIPEADR, 0x00007090);
1335 ACCW(NV10_PIPEDAT, 0x7149f2ca);
1336
1337 ACCW(NV10_PIPEADR, 0x000070a0);
1338 ACCW(NV10_PIPEDAT, 0x7149f2ca);
1339
1340 ACCW(NV10_PIPEADR, 0x00006a80);
1341 ACCW(NV10_PIPEDAT, 0x00000000);
1342 ACCW(NV10_PIPEDAT, 0x00000000);
1343 ACCW(NV10_PIPEDAT, 0x3f800000);
1344
1345 ACCW(NV10_PIPEADR, 0x00006aa0);
1346 ACCW(NV10_PIPEDAT, 0x00000000);
1347 ACCW(NV10_PIPEDAT, 0x00000000);
1348 ACCW(NV10_PIPEDAT, 0x00000000);
1349
1350 /* select primitive type that will be drawn (tri's) */
1351 ACCW(NV10_PIPEADR, 0x00000040);
1352 ACCW(NV10_PIPEDAT, 0x00000005);
1353
1354 ACCW(NV10_PIPEADR, 0x00006400);
1355 ACCW(NV10_PIPEDAT, 0x3f800000);
1356 ACCW(NV10_PIPEDAT, 0x3f800000);
1357 ACCW(NV10_PIPEDAT, 0x4b7fffff);
1358 ACCW(NV10_PIPEDAT, 0x00000000);
1359
1360 ACCW(NV10_PIPEADR, 0x00006410);
1361 ACCW(NV10_PIPEDAT, 0xc5000000);
1362 ACCW(NV10_PIPEDAT, 0xc5000000);
1363 ACCW(NV10_PIPEDAT, 0x00000000);
1364 ACCW(NV10_PIPEDAT, 0x00000000);
1365
1366 ACCW(NV10_PIPEADR, 0x00006420);
1367 ACCW(NV10_PIPEDAT, 0x00000000);
1368 ACCW(NV10_PIPEDAT, 0x00000000);
1369 ACCW(NV10_PIPEDAT, 0x00000000);
1370 ACCW(NV10_PIPEDAT, 0x00000000);
1371
1372 ACCW(NV10_PIPEADR, 0x00006430);
1373 ACCW(NV10_PIPEDAT, 0x00000000);
1374 ACCW(NV10_PIPEDAT, 0x00000000);
1375 ACCW(NV10_PIPEDAT, 0x00000000);
1376 ACCW(NV10_PIPEDAT, 0x00000000);
1377
1378 ACCW(NV10_PIPEADR, 0x000064c0);
1379 ACCW(NV10_PIPEDAT, 0x3f800000);
1380 ACCW(NV10_PIPEDAT, 0x3f800000);
1381 ACCW(NV10_PIPEDAT, 0x477fffff);
1382 ACCW(NV10_PIPEDAT, 0x3f800000);
1383
1384 ACCW(NV10_PIPEADR, 0x000064d0);
1385 ACCW(NV10_PIPEDAT, 0xc5000000);
1386 ACCW(NV10_PIPEDAT, 0xc5000000);
1387 ACCW(NV10_PIPEDAT, 0x00000000);
1388 ACCW(NV10_PIPEDAT, 0x00000000);
1389
1390 ACCW(NV10_PIPEADR, 0x000064e0);
1391 ACCW(NV10_PIPEDAT, 0xc4fff000);
1392 ACCW(NV10_PIPEDAT, 0xc4fff000);
1393 ACCW(NV10_PIPEDAT, 0x00000000);
1394 ACCW(NV10_PIPEDAT, 0x00000000);
1395
1396 ACCW(NV10_PIPEADR, 0x000064f0);
1397 ACCW(NV10_PIPEDAT, 0x00000000);
1398 ACCW(NV10_PIPEDAT, 0x00000000);
1399 ACCW(NV10_PIPEDAT, 0x00000000);
1400 ACCW(NV10_PIPEDAT, 0x00000000);
1401
1402 /* turn lightning on */
1403 ACCW(NV10_XFMOD0, 0x30000000);
1404 /* set light 1 to infinite type, other lights remain off */
1405 ACCW(NV10_XFMOD1, 0x00000004);
1406
1407 /* Z-buffer state is:
1408 * initialized, set to: 'fixed point' (integer?); Z-buffer; 16bits depth */
1409 /* note:
1410 * other options possible are: floating point; 24bits depth; W-buffer */
1411 ACCW(GLOB_STAT_0, 0x10000000);
1412 /* set DMA instance 2 and 3 to be invalid */
1413 ACCW(GLOB_STAT_1, 0x00000000);
1414 }
1415 }
1416
nv_start_dma(void)1417 static void nv_start_dma(void)
1418 {
1419 uint32 dummy;
1420
1421 if (si->engine.dma.current != si->engine.dma.put)
1422 {
1423 si->engine.dma.put = si->engine.dma.current;
1424 /* flush used caches so we know for sure the DMA cmd buffer received all data. */
1425 if (si->ps.card_arch < NV40A)
1426 {
1427 /* some CPU's support out-of-order processing (WinChip/Cyrix). Flush them. */
1428 __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory");
1429 /* read a non-cached adress to flush the cash */
1430 dummy = ACCR(STATUS);
1431 }
1432 else
1433 {
1434 /* dummy read the first adress of the framebuffer to flush MTRR-WC buffers */
1435 dummy = *((volatile uint32 *)(si->framebuffer));
1436 }
1437
1438 /* actually start DMA to execute all commands now in buffer */
1439 /* note:
1440 * it doesn't matter which FIFO channel's DMA registers we access, they are in
1441 * fact all the same set. It also doesn't matter if the channel was assigned a
1442 * command or not. */
1443 /* note also:
1444 * NV_GENERAL_DMAPUT is a write-only register on some cards (confirmed NV11). */
1445 NV_REG32(NVACC_FIFO + NV_GENERAL_DMAPUT) = (si->engine.dma.put << 2);
1446 }
1447 }
1448
1449 /* this routine does not check the engine's internal hardware FIFO, but the DMA
1450 * command buffer. You can see this as a FIFO as well, that feeds the hardware FIFO.
1451 * The hardware FIFO state is checked by the DMA hardware automatically. */
nv_acc_fifofree_dma(uint16 cmd_size)1452 static status_t nv_acc_fifofree_dma(uint16 cmd_size)
1453 {
1454 uint32 dmaget;
1455
1456 /* we'd better check for timeouts on the DMA engine as it's theoretically
1457 * breakable by malfunctioning software */
1458 uint16 cnt = 0;
1459
1460 /* check if the DMA buffer has enough room for the command.
1461 * note:
1462 * engine.dma.free is 'cached' */
1463 while ((si->engine.dma.free < cmd_size) && (cnt < 10000) && (err < 3))
1464 {
1465 /* see where the engine is currently fetching from the buffer */
1466 /* note:
1467 * read this only once in the code as accessing registers is relatively slow */
1468 /* note also:
1469 * it doesn't matter which FIFO channel's DMA registers we access, they are in
1470 * fact all the same set. It also doesn't matter if the channel was assigned a
1471 * command or not. */
1472 dmaget = ((NV_REG32(NVACC_FIFO + NV_GENERAL_DMAGET)) >> 2);
1473
1474 /* update timeout counter: on NV11 on a Pentium4 2.8Ghz max reached count
1475 * using BeRoMeter 1.2.6 was about 600; so counting 10000 before generating
1476 * a timeout should definately do it. Snooze()-ing cannot be done without a
1477 * serious speed penalty, even if done for only 1 microSecond. */
1478 cnt++;
1479
1480 /* where's the engine fetching viewed from us issuing? */
1481 if (si->engine.dma.put >= dmaget)
1482 {
1483 /* engine is fetching 'behind us', the last piece of the buffer is free */
1484
1485 /* note the 'updated' free space we have in the DMA buffer */
1486 si->engine.dma.free = si->engine.dma.max - si->engine.dma.current;
1487 /* if it's enough after all we exit this routine immediately. Else: */
1488 if (si->engine.dma.free < cmd_size)
1489 {
1490 /* not enough room left, so instruct DMA engine to reset the buffer
1491 * when it's reaching the end of it */
1492 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x20000000;
1493 /* reset our buffer pointer, so new commands will be placed at the
1494 * beginning of the buffer. */
1495 si->engine.dma.current = 0;
1496 /* tell the engine to fetch the remaining command(s) in the DMA buffer
1497 * that where not executed before. */
1498 nv_start_dma();
1499
1500 /* NOW the engine is fetching 'in front of us', so the first piece
1501 * of the buffer is free */
1502
1503 /* note the updated current free space we have in the DMA buffer */
1504 si->engine.dma.free = dmaget - si->engine.dma.current;
1505 /* mind this pittfall:
1506 * Leave some room between where the engine is fetching and where we
1507 * put new commands. Otherwise the engine will crash on heavy loads.
1508 * A crash can be forced best in 640x480x32 mode with BeRoMeter 1.2.6.
1509 * (confirmed on NV11 and NV43 with less than 256 words forced freespace.)
1510 * Note:
1511 * The engine is DMA triggered for fetching chunks every 128 bytes,
1512 * maybe this is the reason for this behaviour.
1513 * Note also:
1514 * it looks like the space that needs to be kept free is coupled
1515 * with the size of the DMA buffer. */
1516 if (si->engine.dma.free < 256)
1517 si->engine.dma.free = 0;
1518 else
1519 si->engine.dma.free -= 256;
1520 }
1521 }
1522 else
1523 {
1524 /* engine is fetching 'in front of us', so the first piece of the buffer
1525 * is free */
1526
1527 /* note the updated current free space we have in the DMA buffer */
1528 si->engine.dma.free = dmaget - si->engine.dma.current;
1529 /* mind this pittfall:
1530 * Leave some room between where the engine is fetching and where we
1531 * put new commands. Otherwise the engine will crash on heavy loads.
1532 * A crash can be forced best in 640x480x32 mode with BeRoMeter 1.2.6.
1533 * (confirmed on NV11 and NV43 with less than 256 words forced freespace.)
1534 * Note:
1535 * The engine is DMA triggered for fetching chunks every 128 bytes,
1536 * maybe this is the reason for this behaviour.
1537 * Note also:
1538 * it looks like the space that needs to be kept free is coupled
1539 * with the size of the DMA buffer. */
1540 if (si->engine.dma.free < 256)
1541 si->engine.dma.free = 0;
1542 else
1543 si->engine.dma.free -= 256;
1544 }
1545 }
1546
1547 /* log timeout if we had one */
1548 if (cnt == 10000)
1549 {
1550 if (err < 3) err++;
1551 LOG(4,("ACC_DMA: fifofree; DMA timeout #%d, engine trouble!\n", err));
1552 }
1553
1554 /* we must make the acceleration routines abort or the driver will hang! */
1555 if (err >= 3) return B_ERROR;
1556
1557 return B_OK;
1558 }
1559
nv_acc_cmd_dma(uint32 cmd,uint16 offset,uint16 size)1560 static void nv_acc_cmd_dma(uint32 cmd, uint16 offset, uint16 size)
1561 {
1562 /* NV_FIFO_DMA_OPCODE: set number of cmd words (b18 - 28); set FIFO offset for
1563 * first cmd word (b2 - 15); set DMA opcode = method (b29 - 31).
1564 * a 'NOP' is the opcode word $00000000. */
1565 /* note:
1566 * possible DMA opcodes:
1567 * b'000' is 'method' (execute cmd);
1568 * b'001' is 'jump';
1569 * b'002' is 'noninc method' (execute buffer wrap-around);
1570 * b'003' is 'call': return is executed by opcode word $00020000 (b17 = 1). */
1571 /* note also:
1572 * this system uses auto-increments for the FIFO offset adresses. Make sure
1573 * to set a new adress if a gap exists between the previous one and the new one. */
1574 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = ((size << 18) |
1575 ((si->engine.fifo.ch_ptr[cmd] + offset) & 0x0000fffc));
1576
1577 /* space left after issuing the current command is the cmd AND it's arguments less */
1578 si->engine.dma.free -= (size + 1);
1579 }
1580
nv_acc_set_ch_dma(uint16 ch,uint32 handle)1581 static void nv_acc_set_ch_dma(uint16 ch, uint32 handle)
1582 {
1583 /* issue FIFO channel assign cmd */
1584 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = ((1 << 18) | ch);
1585 /* set new assignment */
1586 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = (0x80000000 | handle);
1587
1588 /* space left after issuing the current command is the cmd AND it's arguments less */
1589 si->engine.dma.free -= 2;
1590 }
1591
1592 /* note:
1593 * switching fifo channel assignments this way has no noticable slowdown:
1594 * measured 0.2% with Quake2. */
nv_acc_assert_fifo_dma(void)1595 void nv_acc_assert_fifo_dma(void)
1596 {
1597 /* does every engine cmd this accelerant needs have a FIFO channel? */
1598 //fixme: can probably be optimized for both speed and channel selection...
1599 if (!si->engine.fifo.ch_ptr[NV_ROP5_SOLID] ||
1600 !si->engine.fifo.ch_ptr[NV_IMAGE_BLACK_RECTANGLE] ||
1601 !si->engine.fifo.ch_ptr[NV_IMAGE_PATTERN] ||
1602 !si->engine.fifo.ch_ptr[NV4_SURFACE] ||
1603 !si->engine.fifo.ch_ptr[NV_IMAGE_BLIT] ||
1604 !si->engine.fifo.ch_ptr[NV4_GDI_RECTANGLE_TEXT] ||
1605 !si->engine.fifo.ch_ptr[NV_SCALED_IMAGE_FROM_MEMORY])
1606 {
1607 uint16 cnt;
1608
1609 /* free the FIFO channels we want from the currently assigned cmd's */
1610 si->engine.fifo.ch_ptr[si->engine.fifo.handle[0]] = 0;
1611 si->engine.fifo.ch_ptr[si->engine.fifo.handle[1]] = 0;
1612 si->engine.fifo.ch_ptr[si->engine.fifo.handle[2]] = 0;
1613 si->engine.fifo.ch_ptr[si->engine.fifo.handle[3]] = 0;
1614 si->engine.fifo.ch_ptr[si->engine.fifo.handle[4]] = 0;
1615 si->engine.fifo.ch_ptr[si->engine.fifo.handle[5]] = 0;
1616 si->engine.fifo.ch_ptr[si->engine.fifo.handle[6]] = 0;
1617
1618 /* set new object handles */
1619 si->engine.fifo.handle[0] = NV_ROP5_SOLID;
1620 si->engine.fifo.handle[1] = NV_IMAGE_BLACK_RECTANGLE;
1621 si->engine.fifo.handle[2] = NV_IMAGE_PATTERN;
1622 si->engine.fifo.handle[3] = NV4_SURFACE;
1623 si->engine.fifo.handle[4] = NV_IMAGE_BLIT;
1624 si->engine.fifo.handle[5] = NV4_GDI_RECTANGLE_TEXT;
1625 si->engine.fifo.handle[6] = NV_SCALED_IMAGE_FROM_MEMORY;
1626
1627 /* set handle's pointers to their assigned FIFO channels */
1628 /* note:
1629 * b0-1 aren't used as adressbits. Using b0 to indicate a valid pointer. */
1630 for (cnt = 0; cnt < 0x08; cnt++)
1631 {
1632 si->engine.fifo.ch_ptr[(si->engine.fifo.handle[cnt])] =
1633 (0x00000001 + (cnt * 0x00002000));
1634 }
1635
1636 /* wait for room in fifo for new FIFO assigment cmds if needed. */
1637 if (nv_acc_fifofree_dma(14) != B_OK) return;
1638
1639 /* program new FIFO assignments */
1640 /* Raster OPeration: */
1641 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH0, si->engine.fifo.handle[0]);
1642 /* Clip: */
1643 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH1, si->engine.fifo.handle[1]);
1644 /* Pattern: */
1645 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH2, si->engine.fifo.handle[2]);
1646 /* 2D Surface: */
1647 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH3, si->engine.fifo.handle[3]);
1648 /* Blit: */
1649 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH4, si->engine.fifo.handle[4]);
1650 /* Bitmap: */
1651 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH5, si->engine.fifo.handle[5]);
1652 /* Scaled and fitered Blit: */
1653 nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH6, si->engine.fifo.handle[6]);
1654
1655 /* tell the engine to fetch and execute all (new) commands in the DMA buffer */
1656 nv_start_dma();
1657 }
1658 }
1659
1660 /*
1661 note:
1662 moved acceleration 'top-level' routines to be integrated in the engine:
1663 it is costly to call the engine for every single function within a loop!
1664 (measured with BeRoMeter 1.2.6: upto 15% speed increase on all CPU's.)
1665
1666 note also:
1667 splitting up each command list into sublists (see routines below) prevents
1668 a lot more nested calls, further increasing the speed with upto 70%.
1669
1670 finally:
1671 sending the sublist to just one single engine command even further increases
1672 speed with upto another 10%. This can't be done for blits though, as this engine-
1673 command's hardware does not support multiple objects.
1674 */
1675
1676 /* screen to screen blit - i.e. move windows around and scroll within them. */
SCREEN_TO_SCREEN_BLIT_DMA(engine_token * et,blit_params * list,uint32 count)1677 void SCREEN_TO_SCREEN_BLIT_DMA(engine_token *et, blit_params *list, uint32 count)
1678 {
1679 uint32 i = 0;
1680 uint16 subcnt;
1681
1682 /*** init acc engine for blit function ***/
1683 /* ROP registers (Raster OPeration):
1684 * wait for room in fifo for ROP cmd if needed. */
1685 if (nv_acc_fifofree_dma(2) != B_OK) return;
1686 /* now setup ROP (writing 2 32bit words) for GXcopy */
1687 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1);
1688 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xcc; /* SetRop5 */
1689
1690 /*** do each blit ***/
1691 /* Note:
1692 * blit-copy direction is determined inside nvidia hardware: no setup needed */
1693 while (count)
1694 {
1695 /* break up the list in sublists to minimize calls, while making sure long
1696 * lists still get executed without trouble */
1697 subcnt = 32;
1698 if (count < 32) subcnt = count;
1699 count -= subcnt;
1700
1701 /* wait for room in fifo for blit cmd if needed. */
1702 if (nv_acc_fifofree_dma(4 * subcnt) != B_OK) return;
1703
1704 while (subcnt--)
1705 {
1706 /* now setup blit (writing 4 32bit words) */
1707 nv_acc_cmd_dma(NV_IMAGE_BLIT, NV_IMAGE_BLIT_SOURCEORG, 3);
1708 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
1709 (((list[i].src_top) << 16) | (list[i].src_left)); /* SourceOrg */
1710 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
1711 (((list[i].dest_top) << 16) | (list[i].dest_left)); /* DestOrg */
1712 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
1713 ((((list[i].height) + 1) << 16) | ((list[i].width) + 1)); /* HeightWidth */
1714
1715 i++;
1716 }
1717
1718 /* tell the engine to fetch the commands in the DMA buffer that where not
1719 * executed before. */
1720 nv_start_dma();
1721 }
1722
1723 /* tell 3D add-ons that they should reload their rendering states and surfaces */
1724 si->engine.threeD.reload = 0xffffffff;
1725 }
1726
1727 /* scaled and filtered screen to screen blit - i.e. video playback without overlay */
1728 /* note: source and destination may not overlap. */
1729 //fixme? checkout NV5 and NV10 version of cmd: faster?? (or is 0x77 a 'autoselect' version?)
SCREEN_TO_SCREEN_SCALED_FILTERED_BLIT_DMA(engine_token * et,scaled_blit_params * list,uint32 count)1730 void SCREEN_TO_SCREEN_SCALED_FILTERED_BLIT_DMA(engine_token *et, scaled_blit_params *list, uint32 count)
1731 {
1732 uint32 i = 0;
1733 uint16 subcnt;
1734 uint32 cmd_depth;
1735 uint8 bpp;
1736
1737 /*** init acc engine for scaled filtered blit function ***/
1738 /* Set pixel width */
1739 switch(si->dm.space)
1740 {
1741 case B_RGB15_LITTLE:
1742 cmd_depth = 0x00000002;
1743 bpp = 2;
1744 break;
1745 case B_RGB16_LITTLE:
1746 cmd_depth = 0x00000007;
1747 bpp = 2;
1748 break;
1749 case B_RGB32_LITTLE:
1750 case B_RGBA32_LITTLE:
1751 cmd_depth = 0x00000004;
1752 bpp = 4;
1753 break;
1754 /* fixme sometime:
1755 * we could do the spaces below if this function would be modified to be able
1756 * to use a source outside of the desktop, i.e. using offscreen bitmaps... */
1757 case B_YCbCr422:
1758 cmd_depth = 0x00000005;
1759 bpp = 2;
1760 break;
1761 case B_YUV422:
1762 cmd_depth = 0x00000006;
1763 bpp = 2;
1764 break;
1765 default:
1766 /* note: this function does not support src or dest in the B_CMAP8 space! */
1767 //fixme: the NV10 version of this cmd supports B_CMAP8 src though... (checkout)
1768 LOG(8,("ACC_DMA: scaled_filtered_blit, invalid bit depth\n"));
1769 return;
1770 }
1771
1772 /* modify surface depth settings for 15-bit colorspace so command works as intended */
1773 if (si->dm.space == B_RGB15_LITTLE)
1774 {
1775 /* wait for room in fifo for surface setup cmd if needed */
1776 if (nv_acc_fifofree_dma(2) != B_OK) return;
1777 /* now setup 2D surface (writing 1 32bit word) */
1778 nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 1);
1779 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000002; /* Format */
1780 }
1781
1782 /* TNT1 has fixed operation mode 'SRCcopy' while the rest can be programmed: */
1783 if (si->ps.card_type != NV04)
1784 {
1785 /* wait for room in fifo for cmds if needed. */
1786 if (nv_acc_fifofree_dma(5) != B_OK) return;
1787 /* now setup source bitmap colorspace */
1788 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SETCOLORFORMAT, 2);
1789 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */
1790 /* now setup operation mode to SRCcopy */
1791 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000003; /* SetOperation */
1792 }
1793 else
1794 {
1795 /* wait for room in fifo for cmd if needed. */
1796 if (nv_acc_fifofree_dma(4) != B_OK) return;
1797 /* now setup source bitmap colorspace */
1798 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SETCOLORFORMAT, 1);
1799 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */
1800 /* TNT1 has fixed operation mode SRCcopy */
1801 }
1802 /* now setup fill color (writing 2 32bit words) */
1803 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1);
1804 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000000; /* Color1A */
1805
1806 /*** do each blit ***/
1807 while (count)
1808 {
1809 /* break up the list in sublists to minimize calls, while making sure long
1810 * lists still get executed without trouble */
1811 subcnt = 16;
1812 if (count < 16) subcnt = count;
1813 count -= subcnt;
1814
1815 /* wait for room in fifo for blit cmd if needed. */
1816 if (nv_acc_fifofree_dma(12 * subcnt) != B_OK) return;
1817
1818 while (subcnt--)
1819 {
1820 /* now setup blit (writing 12 32bit words) */
1821 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SOURCEORG, 6);
1822 /* setup dest clipping ref for blit (not used) (b0-15 = left, b16-31 = top) */
1823 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0; /* SourceOrg */
1824 /* setup dest clipping size for blit */
1825 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
1826 (((list[i].dest_height + 1) << 16) | (list[i].dest_width + 1)); /* SourceHeightWidth */
1827 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
1828 /* setup destination location and size for blit */
1829 (((list[i].dest_top) << 16) | (list[i].dest_left)); /* DestOrg */
1830 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
1831 (((list[i].dest_height + 1) << 16) | (list[i].dest_width + 1)); /* DestHeightWidth */
1832 //fixme: findout scaling limits... (although the current cmd interface doesn't support them.)
1833 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
1834 (((list[i].src_width + 1) << 20) / (list[i].dest_width + 1)); /* HorInvScale (in 12.20 format) */
1835 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
1836 (((list[i].src_height + 1) << 20) / (list[i].dest_height + 1)); /* VerInvScale (in 12.20 format) */
1837
1838 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SOURCESIZE, 4);
1839 /* setup horizontal and vertical source (fetching) ends.
1840 * note:
1841 * horizontal granularity is 2 pixels, vertical granularity is 1 pixel.
1842 * look at Matrox or Neomagic bes engines code for usage example. */
1843 //fixme: tested 15, 16 and 32-bit RGB depth, verify other depths...
1844 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
1845 (((list[i].src_height + 1) << 16) |
1846 (((list[i].src_width + 1) + 0x0001) & ~0x0001)); /* SourceHeightWidth */
1847 /* setup source pitch (b0-15). Set 'format origin center' (b16-17) and
1848 * select 'format interpolator foh (bilinear filtering)' (b24). */
1849 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
1850 (si->fbc.bytes_per_row | (1 << 16) | (1 << 24)); /* SourcePitch */
1851 /* setup source surface location */
1852 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
1853 ((uint32)((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer)) +
1854 (list[i].src_top * si->fbc.bytes_per_row) + (list[i].src_left * bpp); /* Offset */
1855 /* setup source start: first (sub)pixel contributing to output picture */
1856 /* note:
1857 * clipping is not asked for.
1858 * look at nVidia NV10+ bes engine code for useage example. */
1859 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
1860 0; /* SourceRef (b0-15 = hor, b16-31 = ver: both in 12.4 format) */
1861
1862 i++;
1863 }
1864
1865 /* tell the engine to fetch the commands in the DMA buffer that where not
1866 * executed before. */
1867 nv_start_dma();
1868 }
1869
1870 /* reset surface depth settings so the other engine commands works as intended */
1871 if (si->dm.space == B_RGB15_LITTLE)
1872 {
1873 /* wait for room in fifo for surface setup cmd if needed */
1874 if (nv_acc_fifofree_dma(2) != B_OK) return;
1875 /* now setup 2D surface (writing 1 32bit word) */
1876 nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 1);
1877 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000004; /* Format */
1878
1879 /* tell the engine to fetch the commands in the DMA buffer that where not
1880 * executed before. */
1881 nv_start_dma();
1882 }
1883
1884 /* tell 3D add-ons that they should reload their rendering states and surfaces */
1885 si->engine.threeD.reload = 0xffffffff;
1886 }
1887
1888
1889 /* scaled and filtered screen to screen blit - i.e. video playback without overlay */
1890 /* note: source and destination may not overlap. */
1891 // FIXME? checkout NV5 and NV10 version of cmd: faster?? (or is 0x77 a 'autoselect' version?)
OFFSCREEN_TO_SCREEN_SCALED_FILTERED_BLIT_DMA(engine_token * et,offscreen_buffer_config * config,clipped_scaled_blit_params * list,uint32 count)1892 void OFFSCREEN_TO_SCREEN_SCALED_FILTERED_BLIT_DMA(
1893 engine_token *et, offscreen_buffer_config *config, clipped_scaled_blit_params *list, uint32 count)
1894 {
1895 uint32 i = 0;
1896 uint32 cmd_depth;
1897 uint8 bpp;
1898
1899 LOG(4, ("ACC_DMA: offscreen src buffer location $%p\n",
1900 (uint8*)(config->buffer)));
1901
1902 /*** init acc engine for scaled filtered blit function ***/
1903 /* Set pixel width */
1904 switch (config->space)
1905 {
1906 case B_RGB15_LITTLE:
1907 cmd_depth = 0x00000002;
1908 bpp = 2;
1909 break;
1910 case B_RGB16_LITTLE:
1911 cmd_depth = 0x00000007;
1912 bpp = 2;
1913 break;
1914 case B_RGB32_LITTLE:
1915 case B_RGBA32_LITTLE:
1916 cmd_depth = 0x00000004;
1917 bpp = 4;
1918 break;
1919 /* fixme sometime:
1920 * we could do the spaces below if this function would be modified to be able
1921 * to use a source outside of the desktop, i.e. using offscreen bitmaps... */
1922 case B_YCbCr422:
1923 cmd_depth = 0x00000005;
1924 bpp = 2;
1925 break;
1926 case B_YUV422:
1927 cmd_depth = 0x00000006;
1928 bpp = 2;
1929 break;
1930 default:
1931 /* note: this function does not support src or dest in the B_CMAP8 space! */
1932 //fixme: the NV10 version of this cmd supports B_CMAP8 src though... (checkout)
1933 LOG(8,("ACC_DMA: scaled_filtered_blit, invalid bit depth\n"));
1934 return;
1935 }
1936
1937 /* modify surface depth settings for 15-bit colorspace so command works as intended */
1938 if (si->dm.space == B_RGB15_LITTLE)
1939 {
1940 /* wait for room in fifo for surface setup cmd if needed */
1941 if (nv_acc_fifofree_dma(2) != B_OK) return;
1942 /* now setup 2D surface (writing 1 32bit word) */
1943 nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 1);
1944 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000002; /* Format */
1945 }
1946
1947 /* TNT1 has fixed operation mode 'SRCcopy' while the rest can be programmed: */
1948 if (si->ps.card_type != NV04)
1949 {
1950 /* wait for room in fifo for cmds if needed. */
1951 if (nv_acc_fifofree_dma(5) != B_OK) return;
1952 /* now setup source bitmap colorspace */
1953 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SETCOLORFORMAT, 2);
1954 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */
1955 /* now setup operation mode to SRCcopy */
1956 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000003; /* SetOperation */
1957 }
1958 else
1959 {
1960 /* wait for room in fifo for cmd if needed. */
1961 if (nv_acc_fifofree_dma(4) != B_OK) return;
1962 /* now setup source bitmap colorspace */
1963 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SETCOLORFORMAT, 1);
1964 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */
1965 /* TNT1 has fixed operation mode SRCcopy */
1966 }
1967 /* now setup fill color (writing 2 32bit words) */
1968 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1);
1969 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000000; /* Color1A */
1970
1971 /*** do each blit ***/
1972 while (count--)
1973 {
1974 uint32 j = 0;
1975 uint16 clipcnt = list[i].dest_clipcount;
1976
1977 LOG(4,("ACC_DMA: offscreen src left %d, top %d\n", list[i].src_left, list[i].src_top));
1978 LOG(4,("ACC_DMA: offscreen src width %d, height %d\n", list[i].src_width + 1, list[i].src_height + 1));
1979 LOG(4,("ACC_DMA: offscreen dest left %d, top %d\n", list[i].dest_left, list[i].dest_top));
1980 LOG(4,("ACC_DMA: offscreen dest width %d, height %d\n", list[i].dest_width + 1, list[i].dest_height + 1));
1981
1982 /* wait for room in fifo for blit cmd if needed. */
1983 if (nv_acc_fifofree_dma(9 + (5 * clipcnt)) != B_OK) return;
1984
1985 /* now setup blit (writing 12 32bit words) */
1986 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SOURCEORG + 8, 4);
1987 /* setup destination location and size for blit */
1988 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
1989 ((list[i].dest_top << 16) | list[i].dest_left); /* DestTopLeftOutputRect */
1990 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
1991 (((list[i].dest_height + 1) << 16) | (list[i].dest_width + 1)); /* DestHeightWidthOutputRect */
1992 /* setup scaling */
1993 //fixme: findout scaling limits... (although the current cmd interface doesn't support them.)
1994 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
1995 (((list[i].src_width + 1) << 20) / (list[i].dest_width + 1)); /* HorInvScale (in 12.20 format) */
1996 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
1997 (((list[i].src_height + 1) << 20) / (list[i].dest_height + 1)); /* VerInvScale (in 12.20 format) */
1998
1999 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SOURCESIZE, 3);
2000 /* setup horizontal and vertical source (fetching) ends.
2001 * note:
2002 * horizontal granularity is 2 pixels, vertical granularity is 1 pixel.
2003 * look at Matrox or Neomagic bes engines code for usage example. */
2004 //fixme: tested 15, 16 and 32-bit RGB depth, verify other depths...
2005 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
2006 (((list[i].src_height + 1) << 16) |
2007 (((list[i].src_width + 1) + 0x0001) & ~0x0001)); /* SourceHeightWidth */
2008 /* setup source pitch (b0-15). Set 'format origin center' (b16-17) and
2009 * select 'format interpolator foh (bilinear filtering)' (b24). */
2010 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
2011 (config->bytes_per_row | (1 << 16) | (1 << 24)); /* SourcePitch */
2012
2013 /* setup source surface location */
2014 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
2015 (uint32)((uint8*)config->buffer - (uint8*)si->framebuffer +
2016 (list[i].src_top * config->bytes_per_row) + (list[i].src_left * bpp)); /* Offset */
2017
2018 while (clipcnt--)
2019 {
2020 LOG(4,("ACC_DMA: offscreen clip left %d, top %d\n",
2021 list[i].dest_cliplist[j].left, list[i].dest_cliplist[j].top));
2022 LOG(4,("ACC_DMA: offscreen clip width %d, height %d\n",
2023 list[i].dest_cliplist[j].width + 1, list[i].dest_cliplist[j].height + 1));
2024
2025 /* now setup blit (writing 12 32bit words) */
2026 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SOURCEORG, 2);
2027 /* setup dest clipping rect for blit (b0-15 = left, b16-31 = top) */
2028 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
2029 (list[i].dest_cliplist[j].top << 16) | list[i].dest_cliplist[j].left; /* DestTopLeftClipRect */
2030 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
2031 ((list[i].dest_cliplist[j].height + 1) << 16) | (list[i].dest_cliplist[j].width + 1); /* DestHeightWidthClipRect */
2032
2033 nv_acc_cmd_dma(NV_SCALED_IMAGE_FROM_MEMORY, NV_SCALED_IMAGE_FROM_MEMORY_SOURCESIZE + 12, 1);
2034 /* setup source start: first (sub)pixel contributing to output picture */
2035 /* note:
2036 * clipping is not asked for.
2037 * look at nVidia NV10+ bes engine code for useage example. */
2038 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
2039 0; /* SourceRef (b0-15 = hor, b16-31 = ver: both in 12.4 format) */
2040
2041 j++;
2042 }
2043
2044 i++;
2045 }
2046
2047 /* tell the engine to fetch the commands in the DMA buffer that where not
2048 * executed before. */
2049 nv_start_dma();
2050
2051 /* reset surface depth settings so the other engine commands works as intended */
2052 if (si->dm.space == B_RGB15_LITTLE)
2053 {
2054 /* wait for room in fifo for surface setup cmd if needed */
2055 if (nv_acc_fifofree_dma(2) != B_OK) return;
2056 /* now setup 2D surface (writing 1 32bit word) */
2057 nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 1);
2058 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000004; /* Format */
2059
2060 /* tell the engine to fetch the commands in the DMA buffer that where not
2061 * executed before. */
2062 nv_start_dma();
2063 }
2064
2065 /* tell 3D add-ons that they should reload their rendering states and surfaces */
2066 si->engine.threeD.reload = 0xffffffff;
2067 }
2068
2069 /* rectangle fill - i.e. workspace and window background color */
FILL_RECTANGLE_DMA(engine_token * et,uint32 colorIndex,fill_rect_params * list,uint32 count)2070 void FILL_RECTANGLE_DMA(engine_token *et, uint32 colorIndex, fill_rect_params *list, uint32 count)
2071 {
2072 uint32 i = 0;
2073 uint16 subcnt;
2074
2075 /*** init acc engine for fill function ***/
2076 /* ROP registers (Raster OPeration):
2077 * wait for room in fifo for ROP and bitmap cmd if needed. */
2078 if (nv_acc_fifofree_dma(4) != B_OK) return;
2079 /* now setup ROP (writing 2 32bit words) for GXcopy */
2080 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1);
2081 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xcc; /* SetRop5 */
2082 /* now setup fill color (writing 2 32bit words) */
2083 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1);
2084 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = colorIndex; /* Color1A */
2085
2086 /*** draw each rectangle ***/
2087 while (count)
2088 {
2089 /* break up the list in sublists to minimize calls, while making sure long
2090 * lists still get executed without trouble */
2091 subcnt = 32;
2092 if (count < 32) subcnt = count;
2093 count -= subcnt;
2094
2095 /* wait for room in fifo for bitmap cmd if needed. */
2096 if (nv_acc_fifofree_dma(1 + (2 * subcnt)) != B_OK) return;
2097
2098 /* issue fill command once... */
2099 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, (2 * subcnt));
2100 /* ... and send multiple rects (engine cmd supports 32 max) */
2101 while (subcnt--)
2102 {
2103 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
2104 (((list[i].left) << 16) | ((list[i].top) & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */
2105 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
2106 (((((list[i].right)+1) - (list[i].left)) << 16) |
2107 (((list[i].bottom-list[i].top)+1) & 0x0000ffff)); /* Unclipped Rect 0 WidthHeight */
2108
2109 i++;
2110 }
2111
2112 /* tell the engine to fetch the commands in the DMA buffer that where not
2113 * executed before. */
2114 nv_start_dma();
2115 }
2116
2117 /* tell 3D add-ons that they should reload their rendering states and surfaces */
2118 si->engine.threeD.reload = 0xffffffff;
2119 }
2120
2121 /* span fill - i.e. (selected) menuitem background color (Dano) */
FILL_SPAN_DMA(engine_token * et,uint32 colorIndex,uint16 * list,uint32 count)2122 void FILL_SPAN_DMA(engine_token *et, uint32 colorIndex, uint16 *list, uint32 count)
2123 {
2124 uint32 i = 0;
2125 uint16 subcnt;
2126
2127 /*** init acc engine for fill function ***/
2128 /* ROP registers (Raster OPeration):
2129 * wait for room in fifo for ROP and bitmap cmd if needed. */
2130 if (nv_acc_fifofree_dma(4) != B_OK) return;
2131 /* now setup ROP (writing 2 32bit words) for GXcopy */
2132 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1);
2133 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0xcc; /* SetRop5 */
2134 /* now setup fill color (writing 2 32bit words) */
2135 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1);
2136 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = colorIndex; /* Color1A */
2137
2138 /*** draw each span ***/
2139 while (count)
2140 {
2141 /* break up the list in sublists to minimize calls, while making sure long
2142 * lists still get executed without trouble */
2143 subcnt = 32;
2144 if (count < 32) subcnt = count;
2145 count -= subcnt;
2146
2147 /* wait for room in fifo for bitmap cmd if needed. */
2148 if (nv_acc_fifofree_dma(1 + (2 * subcnt)) != B_OK) return;
2149
2150 /* issue fill command once... */
2151 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, (2 * subcnt));
2152 /* ... and send multiple rects (spans) (engine cmd supports 32 max) */
2153 while (subcnt--)
2154 {
2155 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
2156 (((list[i+1]) << 16) | ((list[i]) & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */
2157 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
2158 ((((list[i+2]+1) - (list[i+1])) << 16) | 0x00000001); /* Unclipped Rect 0 WidthHeight */
2159
2160 i+=3;
2161 }
2162
2163 /* tell the engine to fetch the commands in the DMA buffer that where not
2164 * executed before. */
2165 nv_start_dma();
2166 }
2167
2168 /* tell 3D add-ons that they should reload their rendering states and surfaces */
2169 si->engine.threeD.reload = 0xffffffff;
2170 }
2171
2172 /* rectangle invert - i.e. text cursor and text selection */
INVERT_RECTANGLE_DMA(engine_token * et,fill_rect_params * list,uint32 count)2173 void INVERT_RECTANGLE_DMA(engine_token *et, fill_rect_params *list, uint32 count)
2174 {
2175 uint32 i = 0;
2176 uint16 subcnt;
2177
2178 /*** init acc engine for invert function ***/
2179 /* ROP registers (Raster OPeration):
2180 * wait for room in fifo for ROP and bitmap cmd if needed. */
2181 if (nv_acc_fifofree_dma(4) != B_OK) return;
2182 /* now setup ROP (writing 2 32bit words) for GXinvert */
2183 nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1);
2184 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x55; /* SetRop5 */
2185 /* now reset fill color (writing 2 32bit words) */
2186 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1);
2187 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] = 0x00000000; /* Color1A */
2188
2189 /*** invert each rectangle ***/
2190 while (count)
2191 {
2192 /* break up the list in sublists to minimize calls, while making sure long
2193 * lists still get executed without trouble */
2194 subcnt = 32;
2195 if (count < 32) subcnt = count;
2196 count -= subcnt;
2197
2198 /* wait for room in fifo for bitmap cmd if needed. */
2199 if (nv_acc_fifofree_dma(1 + (2 * subcnt)) != B_OK) return;
2200
2201 /* issue fill command once... */
2202 nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, (2 * subcnt));
2203 /* ... and send multiple rects (engine cmd supports 32 max) */
2204 while (subcnt--)
2205 {
2206 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
2207 (((list[i].left) << 16) | ((list[i].top) & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */
2208 ((uint32*)(si->dma_buffer))[si->engine.dma.current++] =
2209 (((((list[i].right)+1) - (list[i].left)) << 16) |
2210 (((list[i].bottom-list[i].top)+1) & 0x0000ffff)); /* Unclipped Rect 0 WidthHeight */
2211
2212 i++;
2213 }
2214
2215 /* tell the engine to fetch the commands in the DMA buffer that where not
2216 * executed before. */
2217 nv_start_dma();
2218 }
2219
2220 /* tell 3D add-ons that they should reload their rendering states and surfaces */
2221 si->engine.threeD.reload = 0xffffffff;
2222 }
2223