xref: /haiku/src/add-ons/accelerants/nvidia/engine/nv_acc_dma.c (revision 95bac3fda53a4cb21880712d7b43f8c21db32a2e)
1 /* NV Acceleration functions */
2 
3 /* Author:
4    Rudolf Cornelissen 8/2003-1/2005.
5 
6    This code was possible thanks to:
7     - the Linux XFree86 NV driver,
8     - the Linux UtahGLX 3D driver.
9 */
10 
11 /*
12 	note:
13 	attempting DMA because without it I can't get NV40 and higher going ATM.
14 	Maybe later we can forget about the non-DMA version: that depends on
15 	3D acceleration attempts).
16 */
17 
18 #define MODULE_BIT 0x00080000
19 
20 #include "nv_std.h"
21 
22 /*acceleration notes*/
23 
24 /*functions Be's app_server uses:
25 fill span (horizontal only)
26 fill rectangle (these 2 are very similar)
27 invert rectangle
28 blit
29 */
30 
31 static void nv_start_dma(void);
32 static status_t nv_acc_fifofree_dma(uint16 cmd_size);
33 static void nv_acc_cmd_dma(uint32 cmd, uint16 offset, uint16 size);
34 static void nv_acc_set_ch_dma(uint16 ch, uint32 handle);
35 
36 /* used to track engine DMA stalls */
37 static uint8 err;
38 
39 /* wait until engine completely idle */
40 status_t nv_acc_wait_idle_dma()
41 {
42 	/* we'd better check for timeouts on the DMA engine as it's theoretically
43 	 * breakable by malfunctioning software */
44 	uint16 cnt = 0;
45 
46 	/* wait until all upcoming commands are in execution at least. Do this until
47 	 * we hit a timeout; abort if we failed at least three times before:
48 	 * if DMA stalls, we have to forget about it alltogether at some point, or
49 	 * the system will almost come to a complete halt.. */
50 	/* note:
51 	 * it doesn't matter which FIFO channel's DMA registers we access, they are in
52 	 * fact all the same set. It also doesn't matter if the channel was assigned a
53 	 * command or not. */
54 	while ((NV_REG32(NVACC_FIFO + NV_GENERAL_DMAGET) != (si->engine.dma.put << 2)) &&
55 			(cnt < 10000) && (err < 3))
56 	{
57 		/* snooze a bit so I do not hammer the bus */
58 		snooze (100);
59 		cnt++;
60 	}
61 
62 	/* log timeout if we had one */
63 	if (cnt == 10000)
64 	{
65 		if (err < 3) err++;
66 		LOG(4,("ACC_DMA: wait_idle; DMA timeout #%d, engine trouble!\n", err));
67 	}
68 
69 	/* wait until execution completed */
70 	while (ACCR(STATUS))
71 	{
72 		/* snooze a bit so I do not hammer the bus */
73 		snooze (100);
74 	}
75 
76 	return B_OK;
77 }
78 
79 /* AFAIK this must be done for every new screenmode.
80  * Engine required init. */
81 status_t nv_acc_init_dma()
82 {
83 	uint16 cnt;
84 	uint32 surf_depth, cmd_depth;
85 	/* reset the engine DMA stalls counter */
86 	err = 0;
87 
88 	/* a hanging engine only recovers from a complete power-down/power-up cycle */
89 	NV_REG32(NV32_PWRUPCTRL) = 0x13110011;
90 	snooze(1000);
91 	NV_REG32(NV32_PWRUPCTRL) = 0x13111111;
92 
93 	/* setup PTIMER: */
94 	//fixme? how about NV28 setup as just after coldstarting? (see nv_info.c)
95 	/* set timer numerator to 8 (in b0-15) */
96 	ACCW(PT_NUMERATOR, 0x00000008);
97 	/* set timer denominator to 3 (in b0-15) */
98 	ACCW(PT_DENOMINATR, 0x00000003);
99 
100 	/* disable timer-alarm INT requests (b0) */
101 	ACCW(PT_INTEN, 0x00000000);
102 	/* reset timer-alarm INT status bit (b0) */
103 	ACCW(PT_INTSTAT, 0xffffffff);
104 
105 	/* enable PRAMIN write access on pre NV10 before programming it! */
106 	if (si->ps.card_arch == NV04A)
107 	{
108 		/* set framebuffer config: type = notiling, PRAMIN write access enabled */
109 		NV_REG32(NV32_PFB_CONFIG_0) = 0x00001114;
110 	}
111 	else
112 	{
113 		/* setup acc engine 'source' tile adressranges */
114 		ACCW(NV10_FBTIL0AD, 0);
115 		ACCW(NV10_FBTIL1AD, 0);
116 		ACCW(NV10_FBTIL2AD, 0);
117 		ACCW(NV10_FBTIL3AD, 0);
118 		ACCW(NV10_FBTIL4AD, 0);
119 		ACCW(NV10_FBTIL5AD, 0);
120 		ACCW(NV10_FBTIL6AD, 0);
121 		ACCW(NV10_FBTIL7AD, 0);
122 		ACCW(NV10_FBTIL0ED, (si->ps.memory_size - 1));
123 		ACCW(NV10_FBTIL1ED, (si->ps.memory_size - 1));
124 		ACCW(NV10_FBTIL2ED, (si->ps.memory_size - 1));
125 		ACCW(NV10_FBTIL3ED, (si->ps.memory_size - 1));
126 		ACCW(NV10_FBTIL4ED, (si->ps.memory_size - 1));
127 		ACCW(NV10_FBTIL5ED, (si->ps.memory_size - 1));
128 		ACCW(NV10_FBTIL6ED, (si->ps.memory_size - 1));
129 		ACCW(NV10_FBTIL7ED, (si->ps.memory_size - 1));
130 	}
131 
132 	/*** PRAMIN ***/
133 	/* first clear the entire RAMHT (hash-table) space to a defined state. It turns
134 	 * out at least NV11 will keep the previously programmed handles over resets and
135 	 * power-outages upto about 15 seconds!! Faulty entries might well hang the
136 	 * engine (confirmed on NV11).
137 	 * Note:
138 	 * this behaviour is not very strange: even very old DRAM chips are known to be
139 	 * able to do this, even though you should refresh them every few milliseconds or
140 	 * so. (Large memory cell capacitors, though different cells vary a lot in their
141 	 * capacity.)
142 	 * Of course data validity is not certain by a long shot over this large
143 	 * amount of time.. */
144 	for(cnt = 0; cnt < 0x0400; cnt++)
145 		NV_REG32(NVACC_HT_HANDL_00 + (cnt << 2)) = 0;
146 	/* RAMHT (hash-table) space SETUP FIFO HANDLES */
147 	/* note:
148 	 * 'instance' tells you where the engine command is stored in 'PR_CTXx_x' sets
149 	 * below: instance being b4-19 with baseadress NV_PRAMIN_CTX_0 (0x00700000).
150 	 * That command is linked to the handle noted here. This handle is then used to
151 	 * tell the FIFO to which engine command it is connected!
152 	 * (CTX registers are actually a sort of RAM space.) */
153 	if (si->ps.card_arch >= NV40A)
154 	{
155 		/* (first set) */
156 		ACCW(HT_HANDL_00, (0x80000000 | NV10_CONTEXT_SURFACES_2D)); /* 32bit handle (not used) */
157 		ACCW(HT_VALUE_00, 0x0010114c); /* instance $114c, engine = acc engine, CHID = $00 */
158 
159 		ACCW(HT_HANDL_01, (0x80000000 | NV_IMAGE_BLIT)); /* 32bit handle */
160 		ACCW(HT_VALUE_01, 0x00101148); /* instance $1146, engine = acc engine, CHID = $00 */
161 
162 		ACCW(HT_HANDL_02, (0x80000000 | NV4_GDI_RECTANGLE_TEXT)); /* 32bit handle */
163 		ACCW(HT_VALUE_02, 0x0010114a); /* instance $1147, engine = acc engine, CHID = $00 */
164 
165 		/* (second set) */
166 		ACCW(HT_HANDL_10, (0x80000000 | NV_ROP5_SOLID)); /* 32bit handle */
167 		ACCW(HT_VALUE_10, 0x00101142); /* instance $1142, engine = acc engine, CHID = $00 */
168 
169 		ACCW(HT_HANDL_11, (0x80000000 | NV_IMAGE_BLACK_RECTANGLE)); /* 32bit handle */
170 		ACCW(HT_VALUE_11, 0x00101144); /* instance $1143, engine = acc engine, CHID = $00 */
171 
172 		ACCW(HT_HANDL_12, (0x80000000 | NV_IMAGE_PATTERN)); /* 32bit handle */
173 		ACCW(HT_VALUE_12, 0x00101146); /* instance $1144, engine = acc engine, CHID = $00 */
174 	}
175 	else
176 	{
177 		/* (first set) */
178 		ACCW(HT_HANDL_00, (0x80000000 | NV4_SURFACE)); /* 32bit handle */
179 		ACCW(HT_VALUE_00, 0x8001114c); /* instance $114c, engine = acc engine, CHID = $00 */
180 
181 		ACCW(HT_HANDL_01, (0x80000000 | NV_IMAGE_BLIT)); /* 32bit handle */
182 		ACCW(HT_VALUE_01, 0x80011148); /* instance $1146, engine = acc engine, CHID = $00 */
183 
184 		ACCW(HT_HANDL_02, (0x80000000 | NV4_GDI_RECTANGLE_TEXT)); /* 32bit handle */
185 		ACCW(HT_VALUE_02, 0x8001114a); /* instance $1147, engine = acc engine, CHID = $00 */
186 
187 		/* (second set) */
188 		ACCW(HT_HANDL_10, (0x80000000 | NV_ROP5_SOLID)); /* 32bit handle */
189 		ACCW(HT_VALUE_10, 0x80011142); /* instance $1142, engine = acc engine, CHID = $00 */
190 
191 		ACCW(HT_HANDL_11, (0x80000000 | NV_IMAGE_BLACK_RECTANGLE)); /* 32bit handle */
192 		ACCW(HT_VALUE_11, 0x80011144); /* instance $1143, engine = acc engine, CHID = $00 */
193 
194 		ACCW(HT_HANDL_12, (0x80000000 | NV_IMAGE_PATTERN)); /* 32bit handle */
195 		ACCW(HT_VALUE_12, 0x80011146); /* instance $1144, engine = acc engine, CHID = $00 */
196 	}
197 
198 	/* program CTX registers: CTX1 is mostly done later (colorspace dependant) */
199 	/* note:
200 	 * CTX determines which HT handles point to what engine commands. */
201 	/* note also:
202 	 * CTX registers are in fact in the same GPU internal RAM space as the engine's
203 	 * hashtable. This means that stuff programmed in here also survives resets and
204 	 * power-outages! (confirmed NV11) */
205 	if (si->ps.card_arch >= NV40A)
206 	{
207 		/* setup a DMA define for use by command defines below. */
208 		ACCW(PR_CTX0_R, 0x00003000); /* DMA page table present and of linear type;
209 									  * DMA target node is NVM (non-volatile memory?)
210 									  * (instead of doing PCI or AGP transfers) */
211 		ACCW(PR_CTX1_R, (si->ps.memory_size - 1)); /* DMA limit: size is all cardRAM */
212 		ACCW(PR_CTX2_R, ((0x00000000 & 0xfffff000) | 0x00000002));
213 									 /* DMA access type is READ_AND_WRITE;
214 									  * memory starts at start of cardRAM (b12-31):
215 									  * It's adress needs to be at a 4kb boundary! */
216 		ACCW(PR_CTX3_R, 0x00000002); /* unknown (looks like this is rubbish/not needed?) */
217 		/* setup set '0' for cmd NV_ROP5_SOLID */
218 		ACCW(PR_CTX0_0, 0x02080043); /* NVclass $043, patchcfg ROP_AND, nv10+: little endian */
219 		ACCW(PR_CTX1_0, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */
220 		ACCW(PR_CTX2_0, 0x00000000); /* DMA0 and DMA1 instance invalid */
221 		ACCW(PR_CTX3_0, 0x00000000); /* method traps disabled */
222 		ACCW(PR_CTX0_1, 0x00000000); /* extra */
223 		ACCW(PR_CTX1_1, 0x00000000); /* extra */
224 		/* setup set '1' for cmd NV_IMAGE_BLACK_RECTANGLE */
225 		ACCW(PR_CTX0_2, 0x02080019); /* NVclass $019, patchcfg ROP_AND, nv10+: little endian */
226 		ACCW(PR_CTX1_2, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */
227 		ACCW(PR_CTX2_2, 0x00000000); /* DMA0 and DMA1 instance invalid */
228 		ACCW(PR_CTX3_2, 0x00000000); /* method traps disabled */
229 		ACCW(PR_CTX0_3, 0x00000000); /* extra */
230 		ACCW(PR_CTX1_3, 0x00000000); /* extra */
231 		/* setup set '2' for cmd NV_IMAGE_PATTERN */
232 		ACCW(PR_CTX0_4, 0x02080018); /* NVclass $018, patchcfg ROP_AND, nv10+: little endian */
233 		ACCW(PR_CTX1_4, 0x02000000); /* colorspace not set, notify instance is $0200 (b16-31) */
234 		ACCW(PR_CTX2_4, 0x00000000); /* DMA0 and DMA1 instance invalid */
235 		ACCW(PR_CTX3_4, 0x00000000); /* method traps disabled */
236 		ACCW(PR_CTX0_5, 0x00000000); /* extra */
237 		ACCW(PR_CTX1_5, 0x00000000); /* extra */
238 		/* setup set '4' for cmd NV_IMAGE_BLIT */
239 		ACCW(PR_CTX0_6, 0x0208005f); /* NVclass $05f, patchcfg ROP_AND, nv10+: little endian */
240 		ACCW(PR_CTX1_6, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */
241 		ACCW(PR_CTX2_6, 0x00001140); /* DMA0 instance is $1140, DMA1 instance invalid */
242 		ACCW(PR_CTX3_6, 0x00001140); /* method trap 0 is $1140, trap 1 disabled */
243 		ACCW(PR_CTX0_7, 0x00000000); /* extra */
244 		ACCW(PR_CTX1_7, 0x00000000); /* extra */
245 		/* setup set '5' for cmd NV4_GDI_RECTANGLE_TEXT */
246 		ACCW(PR_CTX0_8, 0x0208004a); /* NVclass $04a, patchcfg ROP_AND, nv10+: little endian */
247 		ACCW(PR_CTX1_8, 0x02000000); /* colorspace not set, notify instance is $0200 (b16-31) */
248 		ACCW(PR_CTX2_8, 0x00000000); /* DMA0 and DMA1 instance invalid */
249 		ACCW(PR_CTX3_8, 0x00000000); /* method traps disabled */
250 		ACCW(PR_CTX0_9, 0x00000000); /* extra */
251 		ACCW(PR_CTX1_9, 0x00000000); /* extra */
252 		/* setup set '6' for cmd NV10_CONTEXT_SURFACES_2D */
253 		ACCW(PR_CTX0_A, 0x02080062); /* NVclass $062, nv10+: little endian */
254 		ACCW(PR_CTX1_A, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */
255 		ACCW(PR_CTX2_A, 0x00001140); /* DMA0 instance is $1140, DMA1 instance invalid */
256 		ACCW(PR_CTX3_A, 0x00001140); /* method trap 0 is $1140, trap 1 disabled */
257 		ACCW(PR_CTX0_B, 0x00000000); /* extra */
258 		ACCW(PR_CTX1_B, 0x00000000); /* extra */
259 		/* setup DMA set pointed at by PF_CACH1_DMAI */
260 		ACCW(PR_CTX0_C, 0x00003002); /* DMA page table present and of linear type;
261 									  * DMA class is $002 (b0-11);
262 									  * DMA target node is NVM (non-volatile memory?)
263 									  * (instead of doing PCI or AGP transfers) */
264 		ACCW(PR_CTX1_C, 0x00007fff); /* DMA limit: tablesize is 32k bytes */
265 		ACCW(PR_CTX2_C, (((si->ps.memory_size - 1) & 0xffff8000) | 0x00000002));
266 									 /* DMA access type is READ_AND_WRITE;
267 									  * table is located at end of cardRAM (b12-31):
268 									  * It's adress needs to be at a 4kb boundary! */
269 	}
270 	else
271 	{
272 		/* setup a DMA define for use by command defines below. */
273 		ACCW(PR_CTX0_R, 0x00003000); /* DMA page table present and of linear type;
274 									  * DMA target node is NVM (non-volatile memory?)
275 									  * (instead of doing PCI or AGP transfers) */
276 		ACCW(PR_CTX1_R, (si->ps.memory_size - 1)); /* DMA limit: size is all cardRAM */
277 		ACCW(PR_CTX2_R, ((0x00000000 & 0xfffff000) | 0x00000002));
278 									 /* DMA access type is READ_AND_WRITE;
279 									  * memory starts at start of cardRAM (b12-31):
280 									  * It's adress needs to be at a 4kb boundary! */
281 		ACCW(PR_CTX3_R, 0x00000002); /* unknown (looks like this is rubbish/not needed?) */
282 		/* setup set '0' for cmd NV_ROP5_SOLID */
283 		ACCW(PR_CTX0_0, 0x01008043); /* NVclass $043, patchcfg ROP_AND, nv10+: little endian */
284 		ACCW(PR_CTX1_0, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */
285 		ACCW(PR_CTX2_0, 0x00000000); /* DMA0 and DMA1 instance invalid */
286 		ACCW(PR_CTX3_0, 0x00000000); /* method traps disabled */
287 		/* setup set '1' for cmd NV_IMAGE_BLACK_RECTANGLE */
288 		ACCW(PR_CTX0_2, 0x01008019); /* NVclass $019, patchcfg ROP_AND, nv10+: little endian */
289 		ACCW(PR_CTX1_2, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */
290 		ACCW(PR_CTX2_2, 0x00000000); /* DMA0 and DMA1 instance invalid */
291 		ACCW(PR_CTX3_2, 0x00000000); /* method traps disabled */
292 		/* setup set '2' for cmd NV_IMAGE_PATTERN */
293 		ACCW(PR_CTX0_4, 0x01008018); /* NVclass $018, patchcfg ROP_AND, nv10+: little endian */
294 		ACCW(PR_CTX1_4, 0x00000002); /* colorspace not set, notify instance is $0200 (b16-31) */
295 		ACCW(PR_CTX2_4, 0x00000000); /* DMA0 and DMA1 instance invalid */
296 		ACCW(PR_CTX3_4, 0x00000000); /* method traps disabled */
297 		/* setup set '4' for cmd NV_IMAGE_BLIT */
298 		ACCW(PR_CTX0_6, 0x0100805f); /* NVclass $05f, patchcfg ROP_AND, nv10+: little endian */
299 		ACCW(PR_CTX1_6, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */
300 		ACCW(PR_CTX2_6, 0x11401140); /* DMA0 instance is $1140, DMA1 instance invalid */
301 		ACCW(PR_CTX3_6, 0x00000000); /* method trap 0 is $1140, trap 1 disabled */
302 		/* setup set '5' for cmd NV4_GDI_RECTANGLE_TEXT */
303 		ACCW(PR_CTX0_8, 0x0100804a); /* NVclass $04a, patchcfg ROP_AND, nv10+: little endian */
304 		ACCW(PR_CTX1_8, 0x00000002); /* colorspace not set, notify instance is $0200 (b16-31) */
305 		ACCW(PR_CTX2_8, 0x00000000); /* DMA0 and DMA1 instance invalid */
306 		ACCW(PR_CTX3_8, 0x00000000); /* method traps disabled */
307 		/* setup set '6' for ... */
308 		if(si->ps.card_arch >= NV10A)
309 		{
310 			/* ... cmd NV10_CONTEXT_SURFACES_2D */
311 			ACCW(PR_CTX0_A, 0x01008062); /* NVclass $062, nv10+: little endian */
312 		}
313 		else
314 		{
315 			/* ... cmd NV4_SURFACE */
316 			ACCW(PR_CTX0_A, 0x01008042); /* NVclass $042, nv10+: little endian */
317 		}
318 		ACCW(PR_CTX1_A, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */
319 		ACCW(PR_CTX2_A, 0x11401140); /* DMA0 instance is $1140, DMA1 instance invalid */
320 		ACCW(PR_CTX3_A, 0x00000000); /* method trap 0 is $1140, trap 1 disabled */
321 		/* setup DMA set pointed at by PF_CACH1_DMAI */
322 		ACCW(PR_CTX0_C, 0x00003002); /* DMA page table present and of linear type;
323 									  * DMA class is $002 (b0-11);
324 									  * DMA target node is NVM (non-volatile memory?)
325 									  * (instead of doing PCI or AGP transfers) */
326 		ACCW(PR_CTX1_C, 0x00007fff); /* DMA limit: tablesize is 32k bytes */
327 		ACCW(PR_CTX2_C, (((si->ps.memory_size - 1) & 0xffff8000) | 0x00000002));
328 									 /* DMA access type is READ_AND_WRITE;
329 									  * table is located at end of cardRAM (b12-31):
330 									  * It's adress needs to be at a 4kb boundary! */
331 	}
332 
333 	if (si->ps.card_arch == NV04A)
334 	{
335 /*
336        if((pNv->Chipset & 0x0fff) == 0x0020)
337        {
338            pNv->PRAMIN[0x0824] |= 0x00020000;
339            pNv->PRAMIN[0x0826] += pNv->FbAddress;
340        }
341        pNv->PGRAPH[0x0080/4] = 0x000001FF;//acc DEBUG0
342        pNv->PGRAPH[0x0080/4] = 0x1230C000;
343        pNv->PGRAPH[0x0084/4] = 0x72111101;
344        pNv->PGRAPH[0x0088/4] = 0x11D5F071;
345        pNv->PGRAPH[0x008C/4] = 0x0004FF31;
346        pNv->PGRAPH[0x008C/4] = 0x4004FF31;
347 
348        pNv->PGRAPH[0x0140/4] = 0x00000000;
349        pNv->PGRAPH[0x0100/4] = 0xFFFFFFFF;
350        pNv->PGRAPH[0x0170/4] = 0x10010100;
351        pNv->PGRAPH[0x0710/4] = 0xFFFFFFFF;
352        pNv->PGRAPH[0x0720/4] = 0x00000001;
353 
354        pNv->PGRAPH[0x0810/4] = 0x00000000;
355        pNv->PGRAPH[0x0608/4] = 0xFFFFFFFF;
356 */
357 	}
358 	else
359 	{
360 		/* do a explicit engine reset */
361 		ACCW(DEBUG0, 0xffffffff);
362 		ACCW(DEBUG0, 0x00000000);
363 		/* disable all acceleration engine INT reguests */
364 		ACCW(ACC_INTE, 0x00000000);
365 		/* reset all acceration engine INT status bits */
366 		ACCW(ACC_INTS, 0xffffffff);
367 		/* context control enabled */
368 		ACCW(NV10_CTX_CTRL, 0x10010100);
369 		/* all acceleration buffers, pitches and colors are valid */
370 		ACCW(NV10_ACC_STAT, 0xffffffff);
371 		/* enable acceleration engine command FIFO */
372 		ACCW(FIFO_EN, 0x00000001);
373 		/* setup surface type */
374 		ACCW(NV10_SURF_TYP, ((ACCR(NV10_SURF_TYP)) & 0x0007ff00));
375 		ACCW(NV10_SURF_TYP, ((ACCR(NV10_SURF_TYP)) | 0x00020100));
376 	}
377 
378 	if (si->ps.card_arch == NV10A)
379 	{
380 		/* init some function blocks */
381 		ACCW(DEBUG1, 0x00118700);
382 		ACCW(DEBUG2, 0x24e00810);
383 		ACCW(DEBUG3, 0x55de0030);
384 
385 		/* copy tile setup stuff from 'source' to acc engine */
386 		for (cnt = 0; cnt < 32; cnt++)
387 		{
388 			NV_REG32(NVACC_NV10_TIL0AD + (cnt << 2)) =
389 				NV_REG32(NVACC_NV10_FBTIL0AD + (cnt << 2));
390 		}
391 
392 		/* setup location of active screen in framebuffer */
393 		ACCW(OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
394 		ACCW(OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
395 		/* setup accesible card memory range */
396 		ACCW(BLIMIT0, (si->ps.memory_size - 1));
397 		ACCW(BLIMIT1, (si->ps.memory_size - 1));
398 
399 		/* pattern shape value = 8x8, 2 color */
400 		//fixme: setting this here means that we don't need to provide the acc
401 		//commands with it. But have other architectures this pre-programmed
402 		//explicitly??? I don't think so!
403 		ACCW(PAT_SHP, 0x00000000);
404 		/* Pgraph Beta AND value (fraction) b23-30 */
405 		ACCW(BETA_AND_VAL, 0xffffffff);
406 	}
407 
408 	if (si->ps.card_arch >= NV20A)
409 	{
410 		switch (si->ps.card_arch)
411 		{
412 		case NV40A:
413 			/* init some function blocks */
414 			ACCW(DEBUG1, 0x401287c0);
415 			ACCW(DEBUG3, 0x60de8051);
416 			/* disable specific functions, but enable SETUP_SPARE2 register */
417 			ACCW(NV10_DEBUG4, 0x00008000);
418 			/* set limit_viol_pix_adress(?): more likely something unknown.. */
419 			ACCW(NV25_WHAT0, 0x00be3c5f);
420 
421 			/* unknown.. */
422 			switch (si->ps.card_type)
423 			{
424 			case NV40:
425 				ACCW(NV40_WHAT0, 0x83280fff);
426 				ACCW(NV40_WHAT1, 0x000000a0);
427 				ACCW(NV40_WHAT2, 0x0078e366);
428 				ACCW(NV40_WHAT3, 0x0000014c);
429 //      	    pNv->PFB[0x033C/4] &= 0xffff7fff;//0x00100000 :<<<< NV_PFB_CLOSE_PAGE2, bits unknown
430 				break;
431 			case NV41:
432 				ACCW(NV40P_WHAT0, 0x83280eff);
433 				ACCW(NV40P_WHAT1, 0x000000a0);
434 				ACCW(NV40P_WHAT2, 0x007596ff);
435 				ACCW(NV40P_WHAT3, 0x00000108);
436 				break;
437 			case NV43:
438 				ACCW(NV40P_WHAT0, 0x83280eff);
439 				ACCW(NV40P_WHAT1, 0x000000a0);
440 				ACCW(NV40P_WHAT2, 0x0072cb77);
441 				ACCW(NV40P_WHAT3, 0x00000108);
442 				break;
443 			case NV45: //fixme, checkout: this is cardID 0x016x at least!
444 				ACCW(NV40P_WHAT0, 0x83280eff);
445 				ACCW(NV40P_WHAT1, 0x000000a0);
446 
447 				NV_REG32(NV32_NV45_WHAT10) = NV_REG32(NV32_NV10STRAPINFO);
448 				NV_REG32(NV32_NV45_WHAT11) = 0x00000000;
449 				NV_REG32(NV32_NV45_WHAT12) = 0x00000000;
450 				NV_REG32(NV32_NV45_WHAT13) = NV_REG32(NV32_NV10STRAPINFO);
451 
452 				ACCW(NV45_WHAT2, 0x00000000);
453 				ACCW(NV45_WHAT3, 0x00000000);
454 //schakelt screrm signaal uit op NV43, maar timing blijft werken<<<<<<<<
455 //      	    pNv->PRAMDAC[0x0608/4] |= 0x00100000;//0x00680608==NVDAC_TSTCTRL haiku
456               									//b20=1=DACTM_TEST ON (termination?)
457               									//how about: NVDAC2_TSTCTRL????
458 				break;
459 			default:
460 				ACCW(NV40P_WHAT0, 0x83280eff);
461 				ACCW(NV40P_WHAT1, 0x000000a0);
462 				break;
463 			}
464 
465 			ACCW(NV10_TIL3PT, 0x2ffff800);
466 			ACCW(NV10_TIL3ST, 0x00006000);
467 			ACCW(NV4X_WHAT1, 0x01000000);
468 			/* engine data source DMA instance = $1140 */
469 			ACCW(NV4X_DMA_SRC, 0x00001140);
470 			break;
471 		case NV30A:
472 			/* init some function blocks, but most is unknown.. */
473 			ACCW(DEBUG1, 0x40108700);
474 			ACCW(NV25_WHAT1, 0x00140000);
475 			ACCW(DEBUG3, 0xf00e0431);
476 			ACCW(NV10_DEBUG4, 0x00008000);
477 			ACCW(NV25_WHAT0, 0xf04b1f36);
478 			ACCW(NV20_WHAT3, 0x1002d888);
479 			ACCW(NV25_WHAT2, 0x62ff007f);
480 			break;
481 		case NV20A:
482 			/* init some function blocks, but most is unknown.. */
483 			ACCW(DEBUG1, 0x00118700);
484 			ACCW(DEBUG3, 0xf20e0431);
485 			ACCW(NV10_DEBUG4, 0x00000000);
486 			ACCW(NV20_WHAT1, 0x00000040);
487 			if (si->ps.card_type < NV25)
488 			{
489 				ACCW(NV20_WHAT2, 0x00080000);
490 				ACCW(NV10_DEBUG5, 0x00000005);
491 				ACCW(NV20_WHAT3, 0x45caa208);
492 				ACCW(NV20_WHAT4, 0x24000000);
493 				ACCW(NV20_WHAT5, 0x00000040);
494 
495 				/* copy some fixed RAM(?) configuration info(?) to some indexed registers: */
496 				/* b16-24 is select; b2-13 is adress in 32-bit words */
497 				ACCW(RDI_INDEX, 0x00e00038);
498 				/* data is 32-bit */
499 				ACCW(RDI_DATA, 0x00000030);
500 				/* copy some fixed RAM(?) configuration info(?) to some indexed registers: */
501 				/* b16-24 is select; b2-13 is adress in 32-bit words */
502 				ACCW(RDI_INDEX, 0x00e10038);
503 				/* data is 32-bit */
504 				ACCW(RDI_DATA, 0x00000030);
505 			}
506 			else
507 			{
508 				ACCW(NV25_WHAT1, 0x00080000);
509 				ACCW(NV25_WHAT0, 0x304b1fb6);
510 				ACCW(NV20_WHAT3, 0x18b82880);
511 				ACCW(NV20_WHAT4, 0x44000000);
512 				ACCW(NV20_WHAT5, 0x40000080);
513 				ACCW(NV25_WHAT2, 0x000000ff);
514 			}
515 			break;
516 		}
517 
518 		/* NV20A, NV30A and NV40A: */
519 		/* copy tile setup stuff from 'source' to acc engine (pattern colorRAM?) */
520 		for (cnt = 0; cnt < 32; cnt++)
521 		{
522 			NV_REG32(NVACC_NV20_WHAT0 + (cnt << 2)) =
523 				NV_REG32(NVACC_NV10_FBTIL0AD + (cnt << 2));
524 		}
525 
526 		if (si->ps.card_arch >= NV40A)
527 		{
528 			if (si->ps.card_type == NV40)
529 			{
530 				/* copy some RAM configuration info(?) */
531  				ACCW(NV20_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0));
532 				ACCW(NV20_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1));
533 				ACCW(NV40_WHAT_T2, NV_REG32(NV32_PFB_CONFIG_0));
534 				ACCW(NV40_WHAT_T3, NV_REG32(NV32_PFB_CONFIG_1));
535 
536 				/* setup location of active screen in framebuffer */
537 				ACCW(NV20_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
538 				ACCW(NV20_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
539 				/* setup accesible card memory range */
540 				ACCW(NV20_BLIMIT6, (si->ps.memory_size - 1));
541 				ACCW(NV20_BLIMIT7, (si->ps.memory_size - 1));
542 			}
543 			else
544 			{
545 				/* copy some RAM configuration info(?) */
546 				ACCW(NV40P_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0));
547 				ACCW(NV40P_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1));
548 				ACCW(NV40P_WHAT_T2, NV_REG32(NV32_PFB_CONFIG_0));
549 				ACCW(NV40P_WHAT_T3, NV_REG32(NV32_PFB_CONFIG_1));
550 
551 				/* setup location of active screen in framebuffer */
552 				ACCW(NV40P_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
553 				ACCW(NV40P_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
554 				/* setup accesible card memory range */
555 				ACCW(NV40P_BLIMIT6, (si->ps.memory_size - 1));
556 				ACCW(NV40P_BLIMIT7, (si->ps.memory_size - 1));
557 			}
558 		}
559 		else /* NV20A and NV30A: */
560 		{
561 			/* copy some RAM configuration info(?) */
562 			ACCW(NV20_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0));
563 			ACCW(NV20_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1));
564 			/* copy some RAM configuration info(?) to some indexed registers: */
565 			/* b16-24 is select; b2-13 is adress in 32-bit words */
566 			ACCW(RDI_INDEX, 0x00ea0000);
567 			/* data is 32-bit */
568 			ACCW(RDI_DATA, NV_REG32(NV32_PFB_CONFIG_0));
569 			/* b16-24 is select; b2-13 is adress in 32-bit words */
570 			ACCW(RDI_INDEX, 0x00ea0004);
571 			/* data is 32-bit */
572 			ACCW(RDI_DATA, NV_REG32(NV32_PFB_CONFIG_1));
573 
574 			/* setup location of active screen in framebuffer */
575 			ACCW(NV20_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
576 			ACCW(NV20_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
577 			/* setup accesible card memory range */
578 			ACCW(NV20_BLIMIT6, (si->ps.memory_size - 1));
579 			ACCW(NV20_BLIMIT7, (si->ps.memory_size - 1));
580 		}
581 
582 		/* NV20A, NV30A and NV40A: */
583 		/* setup some acc engine tile stuff */
584 		ACCW(NV10_TIL2AD, 0x00000000);
585 		ACCW(NV10_TIL0ED, 0xffffffff);
586 	}
587 
588 	/* all cards: */
589 	/* setup clipping: rect size is 32768 x 32768, probably max. setting */
590 	/* note:
591 	 * can also be done via the NV_IMAGE_BLACK_RECTANGLE engine command. */
592 	ACCW(ABS_UCLP_XMIN, 0x00000000);
593 	ACCW(ABS_UCLP_YMIN, 0x00000000);
594 	ACCW(ABS_UCLP_XMAX, 0x00007fff);
595 	ACCW(ABS_UCLP_YMAX, 0x00007fff);
596 
597 	/*** PFIFO ***/
598 	/* (setup caches) */
599 	/* disable caches reassign */
600 	ACCW(PF_CACHES, 0x00000000);
601 	/* PFIFO mode: channel 0 is in DMA mode, channels 1 - 32 are in PIO mode */
602 	ACCW(PF_MODE, 0x00000001);
603 	/* cache1 push0 access disabled */
604 	ACCW(PF_CACH1_PSH0, 0x00000000);
605 	/* cache1 pull0 access disabled */
606 	ACCW(PF_CACH1_PUL0, 0x00000000);
607 	/* cache1 push1 mode = DMA */
608 	if (si->ps.card_arch >= NV40A)
609 		ACCW(PF_CACH1_PSH1, 0x00010000);
610 	else
611 		ACCW(PF_CACH1_PSH1, 0x00000100);
612 	/* cache1 DMA Put offset = 0 (b2-28) */
613 	ACCW(PF_CACH1_DMAP, 0x00000000);
614 	/* cache1 DMA Get offset = 0 (b2-28) */
615 	ACCW(PF_CACH1_DMAG, 0x00000000);
616 	/* cache1 DMA instance adress = $114e (b0-15);
617 	 * instance being b4-19 with baseadress NV_PRAMIN_CTX_0 (0x00700000). */
618 	/* note:
619 	 * should point to a DMA definition in CTX register space (which is sort of RAM).
620 	 * This define tells the engine where the DMA cmd buffer is and what it's size is.
621 	 * Inside that cmd buffer you'll find the actual issued engine commands. */
622 	ACCW(PF_CACH1_DMAI, 0x0000114e);
623 	/* cache0 push0 access disabled */
624 	ACCW(PF_CACH0_PSH0, 0x00000000);
625 	/* cache0 pull0 access disabled */
626 	ACCW(PF_CACH0_PUL0, 0x00000000);
627 	/* RAM HT (hash table) baseadress = $10000 (b4-8), size = 4k,
628 	 * search = 128 (is byte offset between hash 'sets') */
629 	/* note:
630 	 * so HT base is $00710000, last is $00710fff.
631 	 * In this space you define the engine command handles (HT_HANDL_XX), which
632 	 * in turn points to the defines in CTX register space (which is sort of RAM) */
633 	ACCW(PF_RAMHT, 0x03000100);
634 	/* RAM FC baseadress = $11000 (b3-8) (size is fixed to 0.5k(?)) */
635 	/* note:
636 	 * so FC base is $00711000, last is $007111ff. (not used?) */
637 	ACCW(PF_RAMFC, 0x00000110);
638 	/* RAM RO baseadress = $11200 (b1-8), size = 0.5k */
639 	/* note:
640 	 * so RO base is $00711200, last is $007113ff. (not used?) */
641 	/* note also:
642 	 * This means(?) the PRAMIN CTX registers are accessible from base $00711400. */
643 	ACCW(PF_RAMRO, 0x00000112);
644 	/* PFIFO size: ch0-15 = 512 bytes, ch16-31 = 124 bytes */
645 	ACCW(PF_SIZE, 0x0000ffff);
646 	/* cache1 hash instance = $ffff (b0-15) */
647 	ACCW(PF_CACH1_HASH, 0x0000ffff);
648 	/* disable all PFIFO INTs */
649 	ACCW(PF_INTEN, 0x00000000);
650 	/* reset all PFIFO INT status bits */
651 	ACCW(PF_INTSTAT, 0xffffffff);
652 	/* cache0 pull0 engine = acceleration engine (graphics) */
653 	ACCW(PF_CACH0_PUL1, 0x00000001);
654 	/* cache1 DMA control: disable some stuff */
655 	ACCW(PF_CACH1_DMAC, 0x00000000);
656 	/* cache1 engine 0 upto/including 7 is software (could also be graphics or DVD) */
657 	ACCW(PF_CACH1_ENG, 0x00000000);
658 	/* cache1 DMA fetch: trigger at 128 bytes, size is 32 bytes, max requests is 15,
659 	 * use little endian */
660 	ACCW(PF_CACH1_DMAF, 0x000f0078);
661 	/* cache1 DMA push: b0 = 1: access is enabled */
662 	ACCW(PF_CACH1_DMAS, 0x00000001);
663 	/* cache1 push0 access enabled */
664 	ACCW(PF_CACH1_PSH0, 0x00000001);
665 	/* cache1 pull0 access enabled */
666 	ACCW(PF_CACH1_PUL0, 0x00000001);
667 	/* cache1 pull1 engine = acceleration engine (graphics) */
668 	ACCW(PF_CACH1_PUL1, 0x00000001);
669 	/* enable PFIFO caches reassign */
670 	ACCW(PF_CACHES, 0x00000001);
671 
672 	/*** init acceleration engine command info ***/
673 	/* set object handles */
674 	/* note:
675 	 * probably depending on some other setup, there are 8 or 32 FIFO channels
676 	 * available. Assuming the current setup only has 8 channels because the 'rest'
677 	 * isn't setup here... */
678 	si->engine.fifo.handle[0] = NV_ROP5_SOLID;
679 	si->engine.fifo.handle[1] = NV_IMAGE_BLACK_RECTANGLE;
680 	si->engine.fifo.handle[2] = NV_IMAGE_PATTERN;
681 	si->engine.fifo.handle[3] = NV4_SURFACE; /* NV10_CONTEXT_SURFACES_2D is identical */
682 	si->engine.fifo.handle[4] = NV_IMAGE_BLIT;
683 	si->engine.fifo.handle[5] = NV4_GDI_RECTANGLE_TEXT;
684 	si->engine.fifo.handle[6] = NV1_RENDER_SOLID_LIN;
685 	si->engine.fifo.handle[7] = NV4_DX5_TEXTURE_TRIANGLE;
686 	/* preset no FIFO channels assigned to cmd's */
687 	for (cnt = 0; cnt < 0x20; cnt++)
688 	{
689 		si->engine.fifo.ch_ptr[cnt] = 0;
690 	}
691 	/* set handle's pointers to their assigned FIFO channels */
692 	/* note:
693 	 * b0-1 aren't used as adressbits. Using b0 to indicate a valid pointer. */
694 	for (cnt = 0; cnt < 0x08; cnt++)
695 	{
696 		si->engine.fifo.ch_ptr[(si->engine.fifo.handle[cnt])] =
697 												(0x00000001 + (cnt * 0x00002000));
698 	}
699 
700 	/*** init DMA command buffer info ***/
701 	si->engine.dma.cmdbuffer = (uint32 *)((char *)si->framebuffer +
702 		((si->ps.memory_size - 1) & 0xffff8000));
703 	LOG(4,("ACC_DMA: command buffer is at adress $%08x\n",
704 		((uint32)(si->engine.dma.cmdbuffer))));
705 	/* we have issued no DMA cmd's to the engine yet */
706 	si->engine.dma.put = 0;
707 	/* the current first free adress in the DMA buffer is at offset 0 */
708 	si->engine.dma.current = 0;
709 	/* the DMA buffer can hold 8k 32-bit words (it's 32kb in size) */
710 	/* note:
711 	 * one word is reserved at the end of the DMA buffer to be able to instruct the
712 	 * engine to do a buffer wrap-around!
713 	 * (DMA opcode 'noninc method': issue word $20000000.) */
714 	si->engine.dma.max = 8192 - 1;
715 	/* note the current free space we have left in the DMA buffer */
716 	si->engine.dma.free = si->engine.dma.max - si->engine.dma.current;
717 
718 	/*** init FIFO via DMA command buffer. ***/
719 	/* wait for room in fifo for new FIFO assigment cmds if needed: */
720 //fixme if CH6 and CH7 are assigned..
721 //	if (nv_acc_fifofree_dma(16) != B_OK) return B_ERROR;
722 	if (nv_acc_fifofree_dma(12) != B_OK) return B_ERROR;
723 
724 	/* program new FIFO assignments */
725 	/* Raster OPeration: */
726 	nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH0, si->engine.fifo.handle[0]);
727 	/* Clip: */
728 	nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH1, si->engine.fifo.handle[1]);
729 	/* Pattern: */
730 	nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH2, si->engine.fifo.handle[2]);
731 	/* 2D Surface: */
732 	nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH3, si->engine.fifo.handle[3]);
733 	/* Blit: */
734 	nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH4, si->engine.fifo.handle[4]);
735 	/* Bitmap: */
736 	nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH5, si->engine.fifo.handle[5]);
737 	/* Line: (not used or 3D only?) */
738 //fixme..
739 //	nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH6, si->engine.fifo.handle[6]);
740 	/* Textured Triangle: (3D only) */
741 //fixme..
742 //	nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH7, si->engine.fifo.handle[7]);
743 
744 	/*** Set pixel width ***/
745 	switch(si->dm.space)
746 	{
747 	case B_CMAP8:
748 		surf_depth = 0x00000001;
749 		cmd_depth = 0x00000003;
750 		break;
751 	case B_RGB15_LITTLE:
752 	case B_RGB16_LITTLE:
753 		surf_depth = 0x00000004;
754 		cmd_depth = 0x00000001;
755 		break;
756 	case B_RGB32_LITTLE:
757 	case B_RGBA32_LITTLE:
758 		surf_depth = 0x00000006;
759 		cmd_depth = 0x00000003;
760 		break;
761 	default:
762 		LOG(8,("ACC_DMA: init, invalid bit depth\n"));
763 		return B_ERROR;
764 	}
765 
766 	/* wait for room in fifo for surface setup cmd if needed */
767 	if (nv_acc_fifofree_dma(5) != B_OK) return B_ERROR;
768 	/* now setup 2D surface (writing 5 32bit words) */
769 	nv_acc_cmd_dma(NV4_SURFACE, NV4_SURFACE_FORMAT, 4);
770 	si->engine.dma.cmdbuffer[si->engine.dma.current++] = surf_depth; /* Format */
771 	/* setup screen pitch */
772 	si->engine.dma.cmdbuffer[si->engine.dma.current++] =
773 		((si->fbc.bytes_per_row & 0x0000ffff) | (si->fbc.bytes_per_row << 16)); /* Pitch */
774 	/* setup screen location */
775 	si->engine.dma.cmdbuffer[si->engine.dma.current++] =
776 		((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer); /* OffsetSource */
777 	si->engine.dma.cmdbuffer[si->engine.dma.current++] =
778 		((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer); /* OffsetDest */
779 
780 	/* wait for room in fifo for pattern colordepth setup cmd if needed */
781 	if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR;
782 	/* set pattern colordepth (writing 2 32bit words) */
783 	nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETCOLORFORMAT, 1);
784 	si->engine.dma.cmdbuffer[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */
785 
786 	/* wait for room in fifo for bitmap colordepth setup cmd if needed */
787 	if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR;
788 	/* set bitmap colordepth (writing 2 32bit words) */
789 	nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_SETCOLORFORMAT, 1);
790 	si->engine.dma.cmdbuffer[si->engine.dma.current++] = cmd_depth; /* SetColorFormat */
791 
792 	/* tell the engine to fetch and execute all (new) commands in the DMA buffer */
793 	nv_start_dma();
794 
795 	return B_OK;
796 }
797 
798 static void nv_start_dma(void)
799 {
800 	uint8 dummy;
801 
802 	if (si->engine.dma.current != si->engine.dma.put)
803 	{
804 		si->engine.dma.put = si->engine.dma.current;
805 		/* dummy read the first adress of the framebuffer: flushes MTRR-WC buffers so
806 		 * we know for sure the DMA command buffer received all data. */
807 		dummy = *((char *)(si->framebuffer));
808 		/* actually start DMA to execute all commands now in buffer */
809 		/* note:
810 		 * it doesn't matter which FIFO channel's DMA registers we access, they are in
811 		 * fact all the same set. It also doesn't matter if the channel was assigned a
812 		 * command or not. */
813 		/* note also:
814 		 * NV_GENERAL_DMAPUT is a write-only register on some cards (confirmed NV11). */
815 		NV_REG32(NVACC_FIFO + NV_GENERAL_DMAPUT) = (si->engine.dma.put << 2);
816 	}
817 }
818 
819 /* this routine does not check the engine's internal hardware FIFO, but the DMA
820  * command buffer. You can see this as a FIFO as well, that feeds the hardware FIFO.
821  * The hardware FIFO state is checked by the DMA hardware automatically. */
822 static status_t nv_acc_fifofree_dma(uint16 cmd_size)
823 {
824 	uint32 dmaget;
825 
826 	/* we'd better check for timeouts on the DMA engine as it's theoretically
827 	 * breakable by malfunctioning software */
828 	uint16 cnt = 0;
829 
830 	/* check if the DMA buffer has enough room for the command.
831 	 * note:
832 	 * engine.dma.free is 'cached' */
833 	while ((si->engine.dma.free < cmd_size) && (cnt < 10000) && (err < 3))
834 	{
835 		/* see where the engine is currently fetching from the buffer */
836 		/* note:
837 		 * read this only once in the code as accessing registers is relatively slow */
838 		/* note also:
839 		 * it doesn't matter which FIFO channel's DMA registers we access, they are in
840 		 * fact all the same set. It also doesn't matter if the channel was assigned a
841 		 * command or not. */
842 		dmaget = ((NV_REG32(NVACC_FIFO + NV_GENERAL_DMAGET)) >> 2);
843 
844 		/* update timeout counter: on NV11 on a Pentium4 2.8Ghz max reached count
845 		 * using BeRoMeter 1.2.6 was about 600; so counting 10000 before generating
846 		 * a timeout should definately do it. Snooze()-ing cannot be done without a
847 		 * serious speed penalty, even if done for only 1 microSecond. */
848 		cnt++;
849 
850 		/* where's the engine fetching viewed from us issuing? */
851 		if (si->engine.dma.put >= dmaget)
852 		{
853 			/* engine is fetching 'behind us', the last piece of the buffer is free */
854 
855 			/* note the 'updated' free space we have in the DMA buffer */
856 			si->engine.dma.free = si->engine.dma.max - si->engine.dma.current;
857 			/* if it's enough after all we exit this routine immediately. Else: */
858 			if (si->engine.dma.free < cmd_size)
859 			{
860 				/* not enough room left, so instruct DMA engine to reset the buffer
861 				 * when it's reaching the end of it */
862 				si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0x20000000;
863 				/* reset our buffer pointer, so new commands will be placed at the
864 				 * beginning of the buffer. */
865 				si->engine.dma.current = 0;
866 				/* tell the engine to fetch the remaining command(s) in the DMA buffer
867 				 * that where not executed before. */
868 				nv_start_dma();
869 
870 				/* NOW the engine is fetching 'in front of us', so the first piece
871 				 * of the buffer is free */
872 
873 				/* note the updated current free space we have in the DMA buffer */
874 				si->engine.dma.free = dmaget - si->engine.dma.current;
875 				/* mind this pittfall:
876 				 * Leave some room between where the engine is fetching and where we
877 				 * put new commands. Otherwise the engine will crash on heavy loads.
878 				 * A crash can be forced best in 640x480x32 mode with BeRoMeter 1.2.6.
879 				 * (confirmed on NV11 and NV43 with less than 256 words forced freespace.)
880 				 * Note:
881 				 * The engine is DMA triggered for fetching chunks every 128 bytes,
882 				 * maybe this is the reason for this behaviour.
883 				 * Note also:
884 				 * it looks like the space that needs to be kept free is coupled
885 				 * with the size of the DMA buffer. */
886 				if (si->engine.dma.free < 256)
887 					si->engine.dma.free = 0;
888 				else
889 					si->engine.dma.free -= 256;
890 			}
891 		}
892 		else
893 		{
894 			/* engine is fetching 'in front of us', so the first piece of the buffer
895 			 * is free */
896 
897 			/* note the updated current free space we have in the DMA buffer */
898 			si->engine.dma.free = dmaget - si->engine.dma.current;
899 			/* mind this pittfall:
900 			 * Leave some room between where the engine is fetching and where we
901 			 * put new commands. Otherwise the engine will crash on heavy loads.
902 			 * A crash can be forced best in 640x480x32 mode with BeRoMeter 1.2.6.
903 			 * (confirmed on NV11 and NV43 with less than 256 words forced freespace.)
904 			 * Note:
905 			 * The engine is DMA triggered for fetching chunks every 128 bytes,
906 			 * maybe this is the reason for this behaviour.
907 			 * Note also:
908 			 * it looks like the space that needs to be kept free is coupled
909 			 * with the size of the DMA buffer. */
910 			if (si->engine.dma.free < 256)
911 				si->engine.dma.free = 0;
912 			else
913 				si->engine.dma.free -= 256;
914 		}
915 	}
916 
917 	/* log timeout if we had one */
918 	if (cnt == 10000)
919 	{
920 		if (err < 3) err++;
921 		LOG(4,("ACC_DMA: fifofree; DMA timeout #%d, engine trouble!\n", err));
922 	}
923 
924 	/* we must make the acceleration routines abort or the driver will hang! */
925 	if (err >= 3) return B_ERROR;
926 
927 	return B_OK;
928 }
929 
930 static void nv_acc_cmd_dma(uint32 cmd, uint16 offset, uint16 size)
931 {
932 	/* NV_FIFO_DMA_OPCODE: set number of cmd words (b18 - 28); set FIFO offset for
933 	 * first cmd word (b2 - 15); set DMA opcode = method (b29 - 31).
934 	 * a 'NOP' is the opcode word $00000000. */
935 	/* note:
936 	 * possible DMA opcodes:
937 	 * b'000' is 'method' (execute cmd);
938 	 * b'001' is 'jump';
939 	 * b'002' is 'noninc method' (execute buffer wrap-around);
940 	 * b'003' is 'call': return is executed by opcode word $00020000 (b17 = 1). */
941 	/* note also:
942 	 * this system uses auto-increments for the FIFO offset adresses. Make sure
943 	 * to set a new adress if a gap exists between the previous one and the new one. */
944 	si->engine.dma.cmdbuffer[si->engine.dma.current++] = ((size << 18) |
945 		((si->engine.fifo.ch_ptr[cmd] + offset) & 0x0000fffc));
946 
947 	/* space left after issuing the current command is the cmd AND it's arguments less */
948 	si->engine.dma.free -= (size + 1);
949 }
950 
951 static void nv_acc_set_ch_dma(uint16 ch, uint32 handle)
952 {
953 	/* issue FIFO channel assign cmd */
954 	si->engine.dma.cmdbuffer[si->engine.dma.current++] = ((1 << 18) | ch);
955 	/* set new assignment */
956 	si->engine.dma.cmdbuffer[si->engine.dma.current++] = (0x80000000 | handle);
957 
958 	/* space left after issuing the current command is the cmd AND it's arguments less */
959 	si->engine.dma.free -= 2;
960 }
961 
962 /* fixme? (check this out..)
963  * Looks like this stuff can be very much simplified and speed-up, as it seems it's not
964  * nessesary to wait for the engine to become idle before re-assigning channels.
965  * Because the cmd handles are actually programmed _inside_ the fifo channels, it might
966  * well be that the assignment is buffered along with the commands that still have to
967  * be executed!
968  * (sounds very plausible to me :) */
969 void nv_acc_assert_fifo_dma(void)
970 {
971 	/* does every engine cmd this accelerant needs have a FIFO channel? */
972 	//fixme: can probably be optimized for both speed and channel selection...
973 	if (!si->engine.fifo.ch_ptr[NV_ROP5_SOLID] ||
974 		!si->engine.fifo.ch_ptr[NV_IMAGE_BLACK_RECTANGLE] ||
975 		!si->engine.fifo.ch_ptr[NV_IMAGE_PATTERN] ||
976 		!si->engine.fifo.ch_ptr[NV4_SURFACE] ||
977 		!si->engine.fifo.ch_ptr[NV_IMAGE_BLIT] ||
978 		!si->engine.fifo.ch_ptr[NV4_GDI_RECTANGLE_TEXT])
979 	{
980 		uint16 cnt;
981 
982 		/* no, wait until the engine is idle before re-assigning the FIFO */
983 		nv_acc_wait_idle_dma();
984 
985 		/* free the FIFO channels we want from the currently assigned cmd's */
986 		si->engine.fifo.ch_ptr[si->engine.fifo.handle[0]] = 0;
987 		si->engine.fifo.ch_ptr[si->engine.fifo.handle[1]] = 0;
988 		si->engine.fifo.ch_ptr[si->engine.fifo.handle[2]] = 0;
989 		si->engine.fifo.ch_ptr[si->engine.fifo.handle[3]] = 0;
990 		si->engine.fifo.ch_ptr[si->engine.fifo.handle[4]] = 0;
991 		si->engine.fifo.ch_ptr[si->engine.fifo.handle[5]] = 0;
992 
993 		/* set new object handles */
994 		si->engine.fifo.handle[0] = NV_ROP5_SOLID;
995 		si->engine.fifo.handle[1] = NV_IMAGE_BLACK_RECTANGLE;
996 		si->engine.fifo.handle[2] = NV_IMAGE_PATTERN;
997 		si->engine.fifo.handle[3] = NV4_SURFACE;
998 		si->engine.fifo.handle[4] = NV_IMAGE_BLIT;
999 		si->engine.fifo.handle[5] = NV4_GDI_RECTANGLE_TEXT;
1000 
1001 		/* set handle's pointers to their assigned FIFO channels */
1002 		/* note:
1003 		 * b0-1 aren't used as adressbits. Using b0 to indicate a valid pointer. */
1004 		for (cnt = 0; cnt < 0x08; cnt++)
1005 		{
1006 			si->engine.fifo.ch_ptr[(si->engine.fifo.handle[cnt])] =
1007 				(0x00000001 + (cnt * 0x00002000));
1008 		}
1009 
1010 		/* wait for room in fifo for new FIFO assigment cmds if needed. */
1011 		if (nv_acc_fifofree_dma(12) != B_OK) return;
1012 
1013 		/* program new FIFO assignments */
1014 		/* Raster OPeration: */
1015 		nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH0, si->engine.fifo.handle[0]);
1016 		/* Clip: */
1017 		nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH1, si->engine.fifo.handle[1]);
1018 		/* Pattern: */
1019 		nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH2, si->engine.fifo.handle[2]);
1020 		/* 2D Surface: */
1021 		nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH3, si->engine.fifo.handle[3]);
1022 		/* Blit: */
1023 		nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH4, si->engine.fifo.handle[4]);
1024 		/* Bitmap: */
1025 		nv_acc_set_ch_dma(NV_GENERAL_FIFO_CH5, si->engine.fifo.handle[5]);
1026 
1027 		/* tell the engine to fetch and execute all (new) commands in the DMA buffer */
1028 		nv_start_dma();
1029 	}
1030 }
1031 
1032 /* screen to screen blit - i.e. move windows around and scroll within them. */
1033 status_t nv_acc_setup_blit_dma()
1034 {
1035 	/* setup solid pattern:
1036 	 * wait for room in fifo for pattern cmd if needed. */
1037 	if (nv_acc_fifofree_dma(7) != B_OK) return B_ERROR;
1038 	/* now setup pattern (writing 7 32bit words) */
1039 	nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETSHAPE, 1);
1040 	si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0x00000000; /* SetShape: 0 = 8x8, 1 = 64x1, 2 = 1x64 */
1041 	nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETCOLOR0, 4);
1042 	si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xffffffff; /* SetColor0 */
1043 	si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xffffffff; /* SetColor1 */
1044 	si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xffffffff; /* SetPattern[0] */
1045 	si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xffffffff; /* SetPattern[1] */
1046 	/* ROP registers (Raster OPeration):
1047 	 * wait for room in fifo for ROP cmd if needed. */
1048 	if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR;
1049 
1050 	/* now setup ROP (writing 2 32bit words) for GXcopy */
1051 	nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1);
1052 	si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xcc; /* SetRop5 */
1053 
1054 	return B_OK;
1055 }
1056 
1057 status_t nv_acc_blit_dma(uint16 xs,uint16 ys,uint16 xd,uint16 yd,uint16 w,uint16 h)
1058 {
1059 	/* Note: blit-copy direction is determined inside riva hardware: no setup needed */
1060 
1061 	/* instruct engine what to blit:
1062 	 * wait for room in fifo for blit cmd if needed. */
1063 	if (nv_acc_fifofree_dma(4) != B_OK) return B_ERROR;
1064 	/* now setup blit (writing 4 32bit words) */
1065 	nv_acc_cmd_dma(NV_IMAGE_BLIT, NV_IMAGE_BLIT_SOURCEORG, 3);
1066 	si->engine.dma.cmdbuffer[si->engine.dma.current++] = ((ys << 16) | xs); /* SourceOrg */
1067 	si->engine.dma.cmdbuffer[si->engine.dma.current++] = ((yd << 16) | xd); /* DestOrg */
1068 	si->engine.dma.cmdbuffer[si->engine.dma.current++] = (((h + 1) << 16) | (w + 1)); /* HeightWidth */
1069 
1070 	/* tell the engine to fetch the commands in the DMA buffer that where not
1071 	 * executed before. At this time the setup done by nv_acc_setup_blit_dma() is
1072 	 * also executed on the first call of nv_acc_blit_dma(). */
1073 	nv_start_dma();
1074 
1075 	return B_OK;
1076 }
1077 
1078 /* rectangle fill - i.e. workspace and window background color */
1079 /* span fill - i.e. (selected) menuitem background color (Dano) */
1080 status_t nv_acc_setup_rectangle_dma(uint32 color)
1081 {
1082 	/* setup solid pattern:
1083 	 * wait for room in fifo for pattern cmd if needed. */
1084 	if (nv_acc_fifofree_dma(7) != B_OK) return B_ERROR;
1085 	/* now setup pattern (writing 7 32bit words) */
1086 	nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETSHAPE, 1);
1087 	si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0x00000000; /* SetShape: 0 = 8x8, 1 = 64x1, 2 = 1x64 */
1088 	nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETCOLOR0, 4);
1089 	si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xffffffff; /* SetColor0 */
1090 	si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xffffffff; /* SetColor1 */
1091 	si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xffffffff; /* SetPattern[0] */
1092 	si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xffffffff; /* SetPattern[1] */
1093 
1094 	/* ROP registers (Raster OPeration):
1095 	 * wait for room in fifo for ROP cmd if needed. */
1096 	if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR;
1097 	/* now setup ROP (writing 2 32bit words) for GXcopy */
1098 	nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1);
1099 	si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xcc; /* SetRop5 */
1100 
1101 	/* setup fill color:
1102 	 * wait for room in fifo for bitmap cmd if needed. */
1103 	if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR;
1104 	/* now setup color (writing 2 32bit words) */
1105 	nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1);
1106 	si->engine.dma.cmdbuffer[si->engine.dma.current++] = color; /* Color1A */
1107 
1108 	return B_OK;
1109 }
1110 
1111 status_t nv_acc_rectangle_dma(uint32 xs,uint32 xe,uint32 ys,uint32 yl)
1112 {
1113 	/* instruct engine what to fill:
1114 	 * wait for room in fifo for bitmap cmd if needed. */
1115 	if (nv_acc_fifofree_dma(3) != B_OK) return B_ERROR;
1116 	/* now setup fill (writing 3 32bit words) */
1117 	nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, 2);
1118 	si->engine.dma.cmdbuffer[si->engine.dma.current++] =
1119 		((xs << 16) | (ys & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */
1120 	si->engine.dma.cmdbuffer[si->engine.dma.current++] =
1121 		(((xe - xs) << 16) | (yl & 0x0000ffff)); /* Unclipped Rect 0 WidthHeight */
1122 
1123 	/* tell the engine to fetch the commands in the DMA buffer that where not
1124 	 * executed before. At this time the setup done by nv_acc_setup_rectangle_dma() is
1125 	 * also executed on the first call of nv_acc_rectangle_dma(). */
1126 	nv_start_dma();
1127 
1128 	return B_OK;
1129 }
1130 
1131 /* rectangle invert - i.e. text cursor and text selection */
1132 status_t nv_acc_setup_rect_invert_dma()
1133 {
1134 	/* setup solid pattern:
1135 	 * wait for room in fifo for pattern cmd if needed. */
1136 	if (nv_acc_fifofree_dma(7) != B_OK) return B_ERROR;
1137 	/* now setup pattern (writing 7 32bit words) */
1138 	nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETSHAPE, 1);
1139 	si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0x00000000; /* SetShape: 0 = 8x8, 1 = 64x1, 2 = 1x64 */
1140 	nv_acc_cmd_dma(NV_IMAGE_PATTERN, NV_IMAGE_PATTERN_SETCOLOR0, 4);
1141 	si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xffffffff; /* SetColor0 */
1142 	si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xffffffff; /* SetColor1 */
1143 	si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xffffffff; /* SetPattern[0] */
1144 	si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0xffffffff; /* SetPattern[1] */
1145 
1146 	/* ROP registers (Raster OPeration):
1147 	 * wait for room in fifo for ROP cmd if needed. */
1148 	if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR;
1149 	/* now setup ROP (writing 2 32bit words) for GXinvert */
1150 	nv_acc_cmd_dma(NV_ROP5_SOLID, NV_ROP5_SOLID_SETROP5, 1);
1151 	si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0x55; /* SetRop5 */
1152 
1153 	/* reset fill color:
1154 	 * wait for room in fifo for bitmap cmd if needed. */
1155 	if (nv_acc_fifofree_dma(2) != B_OK) return B_ERROR;
1156 	/* now reset color (writing 2 32bit words) */
1157 	nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_COLOR1A, 1);
1158 	si->engine.dma.cmdbuffer[si->engine.dma.current++] = 0x00000000; /* Color1A */
1159 
1160 	return B_OK;
1161 }
1162 
1163 status_t nv_acc_rectangle_invert_dma(uint32 xs,uint32 xe,uint32 ys,uint32 yl)
1164 {
1165 	/* instruct engine what to fill:
1166 	 * wait for room in fifo for bitmap cmd if needed. */
1167 	if (nv_acc_fifofree_dma(3) != B_OK) return B_ERROR;
1168 	/* now setup fill (writing 3 32bit words) */
1169 	nv_acc_cmd_dma(NV4_GDI_RECTANGLE_TEXT, NV4_GDI_RECTANGLE_TEXT_UCR0_LEFTTOP, 2);
1170 	si->engine.dma.cmdbuffer[si->engine.dma.current++] =
1171 		((xs << 16) | (ys & 0x0000ffff)); /* Unclipped Rect 0 LeftTop */
1172 	si->engine.dma.cmdbuffer[si->engine.dma.current++] =
1173 		(((xe - xs) << 16) | (yl & 0x0000ffff)); /* Unclipped Rect 0 WidthHeight */
1174 
1175 	/* tell the engine to fetch the commands in the DMA buffer that where not
1176 	 * executed before. At this time the setup done by nv_acc_setup_rectangle_dma() is
1177 	 * also executed on the first call of nv_acc_rectangle_dma(). */
1178 	nv_start_dma();
1179 
1180 	return B_OK;
1181 }
1182