xref: /haiku/src/add-ons/accelerants/nvidia/engine/nv_acc.c (revision 893988af824e65e49e55f517b157db8386e8002b)
1 /* NV Acceleration functions */
2 /* Author:
3    Rudolf Cornelissen 8/2003-5/2009.
4 
5    This code was possible thanks to:
6     - the Linux XFree86 NV driver,
7     - the Linux UtahGLX 3D driver.
8 */
9 
10 /*
11 	note:
12 	Can't get NV40 and higher going using this PIO mode acceleration system ATM.
13 	Here's the problem:
14 	The FIFO is not functioning correctly: the proof of this is that you can only
15 	readout	the PIO FIFO fill-level register (FifoFree) once before it stops responding
16 	(returns only zeros on all reads after the first one).
17 	You can see the issued commands are actually placed in the FIFO because the first
18 	read of FifoFree corresponds to what you'd expect.
19 	There is no visual confirmation of any command actually being executed by the
20 	acceleration engine, so we don't know if the FIFO places commands in the engine.
21 	BTW:
22 	The FifoFree register exhibits the exact same behaviour in DMA acceleration mode.
23 	It's no problem there because we use the DMAPut and DMAGet registers instead.
24 
25 	The non-functioning Fifo in PIO mode might have one of these reasons:
26 	- lack of specs: maybe additional programming is required.
27 	- hardware fault: as probably no-one uses PIO mode acceleration anymore these days,
28 	  nVidia might not care about this any longer.
29 
30 	note also:
31 	Keeping this PIO mode acceleration stuff here for now to guarantee compatibility
32 	with current 3D acceleration attempts: the utahGLX 3D driver cooperated with the
33 	PIO mode acceleration functions in the XFree drivers (upto/including XFree 4.2.0).
34 */
35 
36 #define MODULE_BIT 0x00080000
37 
38 #include "nv_std.h"
39 
40 static void nv_init_for_3D(void);
41 
42 /*acceleration notes*/
43 
44 /*functions Be's app_server uses:
45 fill span (horizontal only)
46 fill rectangle (these 2 are very similar)
47 invert rectangle
48 blit
49 */
50 
51 /*
52 	nVidia hardware info:
53 	We should be able to do FIFO assignment setup changes on-the-fly now, using
54 	all the engine-command-handles that are pre-defined on any FIFO channel.
55 	Also we should be able to setup new additional handles to previously unused
56 	engine commands now.
57 */
58 
59 /* FIFO channel pointers */
60 /* note:
61  * every instance of the accelerant needs to have it's own pointers, as the registers
62  * are cloned to different adress ranges for each one */
63 static cmd_nv_rop5_solid* nv_rop5_solid_ptr;
64 static cmd_nv_image_black_rectangle* nv_image_black_rectangle_ptr;
65 static cmd_nv_image_pattern* nv_image_pattern_ptr;
66 static cmd_nv_image_blit* nv_image_blit_ptr;
67 static cmd_nv3_gdi_rectangle_text* nv3_gdi_rectangle_text_ptr;
68 
69 status_t nv_acc_wait_idle()
70 {
71 	/* wait until engine completely idle */
72 	while (ACCR(STATUS))
73 	{
74 		/* snooze a bit so I do not hammer the bus */
75 		snooze (100);
76 	}
77 
78 	return B_OK;
79 }
80 
81 /* AFAIK this must be done for every new screenmode.
82  * Engine required init. */
83 status_t nv_acc_init()
84 {
85 	uint16 cnt;
86 
87 	/* a hanging engine only recovers from a complete power-down/power-up cycle */
88 	NV_REG32(NV32_PWRUPCTRL) = 0xffff00ff;
89 	snooze(1000);
90 	NV_REG32(NV32_PWRUPCTRL) = 0xffffffff;
91 
92 	/* setup PTIMER: */
93 	//fixme? how about NV28 setup as just after coldstarting? (see nv_info.c)
94 	/* set timer numerator to 8 (in b0-15) */
95 	ACCW(PT_NUMERATOR, 0x00000008);
96 	/* set timer denominator to 3 (in b0-15) */
97 	ACCW(PT_DENOMINATR, 0x00000003);
98 
99 	/* disable timer-alarm INT requests (b0) */
100 	ACCW(PT_INTEN, 0x00000000);
101 	/* reset timer-alarm INT status bit (b0) */
102 	ACCW(PT_INTSTAT, 0xffffffff);
103 
104 	/* enable PRAMIN write access on pre NV10 before programming it! */
105 	if (si->ps.card_arch == NV04A)
106 	{
107 		/* set framebuffer config: type = notiling, PRAMIN write access enabled */
108 		NV_REG32(NV32_PFB_CONFIG_0) = 0x00001114;
109 	}
110 
111 	/*** PFIFO ***/
112 	/* (setup caches) */
113 	/* disable caches reassign */
114 	ACCW(PF_CACHES, 0x00000000);
115 	/* PFIFO mode for all 32 channels is PIO (instead of DMA) */
116 	ACCW(PF_MODE, 0x00000000);
117 	/* cache1 push0 access disabled */
118 	ACCW(PF_CACH1_PSH0, 0x00000000);
119 	/* cache1 pull0 access disabled */
120 	ACCW(PF_CACH1_PUL0, 0x00000000);
121 	/* cache1 push1 mode = pio (disable DMA use) */
122 	ACCW(PF_CACH1_PSH1, 0x00000000);
123 	/* cache1 DMA Put offset = 0 (b2-28) */
124 	ACCW(PF_CACH1_DMAP, 0x00000000);
125 	/* cache1 DMA Get offset = 0 (b2-28) */
126 	ACCW(PF_CACH1_DMAG, 0x00000000);
127 	/* cache1 DMA instance adress = none (b0-15);
128 	 * instance being b4-19 with baseadress NV_PRAMIN_CTX_0 (0x00700000). */
129 	/* note:
130 	 * should point to a DMA definition in CTX register space (which is sort of RAM).
131 	 * This define tells the engine where the DMA cmd buffer is and what it's size is.
132 	 * Inside that cmd buffer you'll find the actual issued engine commands. */
133 	ACCW(PF_CACH1_DMAI, 0x00000000);
134 	/* cache0 push0 access disabled */
135 	ACCW(PF_CACH0_PSH0, 0x00000000);
136 	/* cache0 pull0 access disabled */
137 	ACCW(PF_CACH0_PUL0, 0x00000000);
138 	/* RAM HT (hash table) baseadress = $10000 (b4-8), size = 4k,
139 	 * search = 128 (is byte offset between hash 'sets') */
140 	/* note:
141 	 * so HT base is $00710000, last is $00710fff.
142 	 * In this space you define the engine command handles (HT_HANDL_XX), which
143 	 * in turn points to the defines in CTX register space (which is sort of RAM) */
144 	ACCW(PF_RAMHT, 0x03000100);
145 	/* RAM FC baseadress = $11000 (b3-8) (size is fixed to 0.5k(?)) */
146 	/* note:
147 	 * so FC base is $00711000, last is $007111ff. (not used?) */
148 	ACCW(PF_RAMFC, 0x00000110);
149 	/* RAM RO baseadress = $11200 (b1-8), size = 0.5k */
150 	/* note:
151 	 * so RO base is $00711200, last is $007113ff. (not used?) */
152 	/* note also:
153 	 * This means(?) the PRAMIN CTX registers are accessible from base $00711400. */
154 	ACCW(PF_RAMRO, 0x00000112);
155 	/* PFIFO size: ch0-15 = 512 bytes, ch16-31 = 124 bytes */
156 	ACCW(PF_SIZE, 0x0000ffff);
157 	/* cache1 hash instance = $ffff (b0-15) */
158 	ACCW(PF_CACH1_HASH, 0x0000ffff);
159 	/* disable all PFIFO INTs */
160 	ACCW(PF_INTEN, 0x00000000);
161 	/* reset all PFIFO INT status bits */
162 	ACCW(PF_INTSTAT, 0xffffffff);
163 	/* cache0 pull0 engine = acceleration engine (graphics) */
164 	ACCW(PF_CACH0_PUL1, 0x00000001);
165 	/* cache1 DMA control: disable some stuff */
166 	ACCW(PF_CACH1_DMAC, 0x00000000);
167 	/* cache1 engine 0 upto/including 7 is software (could also be graphics or DVD) */
168 	ACCW(PF_CACH1_ENG, 0x00000000);
169 	/* cache1 DMA fetch: trigger at 128 bytes, size is 32 bytes, max requests is 15,
170 	 * use little endian */
171 	ACCW(PF_CACH1_DMAF, 0x000f0078);
172 	/* cache1 DMA push: b0=0 is access disabled */
173 	ACCW(PF_CACH1_DMAS, 0x00000000);
174 	/* cache1 push0 access enabled */
175 	ACCW(PF_CACH1_PSH0, 0x00000001);
176 	/* cache1 pull0 access enabled */
177 	ACCW(PF_CACH1_PUL0, 0x00000001);
178 	/* cache1 pull1 engine = acceleration engine (graphics) */
179 	ACCW(PF_CACH1_PUL1, 0x00000001);
180 	/* enable PFIFO caches reassign */
181 	ACCW(PF_CACHES, 0x00000001);
182 
183 	/*** PRAMIN ***/
184 	/* first clear the entire RAMHT (hash-table) space to a defined state. It turns
185 	 * out at least NV11 will keep the previously programmed handles over resets and
186 	 * power-outages upto about 15 seconds!! Faulty entries might well hang the
187 	 * engine (confirmed on NV11).
188 	 * Note:
189 	 * this behaviour is not very strange: even very old DRAM chips are known to be
190 	 * able to do this, even though you should refresh them every few milliseconds or
191 	 * so. (Large memory cell capacitors, though different cells vary a lot in their
192 	 * capacity.)
193 	 * Of course data validity is not certain by a long shot over this large
194 	 * amount of time.. */
195 	for(cnt = 0; cnt < 0x0400; cnt++)
196 		NV_REG32(NVACC_HT_HANDL_00 + (cnt << 2)) = 0;
197 	/* RAMHT space (hash-table) SETUP FIFO HANDLES */
198 	/* note:
199 	 * 'instance' tells you where the engine command is stored in 'PR_CTXx_x' sets
200 	 * below: instance being b4-19 with baseadress NV_PRAMIN_CTX_0 (0x00700000).
201 	 * That command is linked to the handle noted here. This handle is then used to
202 	 * tell the FIFO to which engine command it is connected!
203 	 * (CTX registers are actually a sort of RAM space.) */
204 	if (si->ps.card_arch >= NV40A)
205 	{
206 		/* (first set) */
207 		ACCW(HT_HANDL_00, (0x80000000 | NV10_CONTEXT_SURFACES_2D)); /* 32bit handle (not used) */
208 		ACCW(HT_VALUE_00, 0x0010114c); /* instance $114c, engine = acc engine, CHID = $00 */
209 
210 		ACCW(HT_HANDL_01, (0x80000000 | NV_IMAGE_BLIT)); /* 32bit handle */
211 		ACCW(HT_VALUE_01, 0x00101148); /* instance $1148, engine = acc engine, CHID = $00 */
212 
213 		ACCW(HT_HANDL_02, (0x80000000 | NV4_GDI_RECTANGLE_TEXT)); /* 32bit handle */
214 		ACCW(HT_VALUE_02, 0x0010114a); /* instance $114a, engine = acc engine, CHID = $00 */
215 
216 		/* (second set) */
217 		ACCW(HT_HANDL_10, (0x80000000 | NV_ROP5_SOLID)); /* 32bit handle */
218 		ACCW(HT_VALUE_10, 0x00101142); /* instance $1142, engine = acc engine, CHID = $00 */
219 
220 		ACCW(HT_HANDL_11, (0x80000000 | NV_IMAGE_BLACK_RECTANGLE)); /* 32bit handle */
221 		ACCW(HT_VALUE_11, 0x00101144); /* instance $1144, engine = acc engine, CHID = $00 */
222 
223 		ACCW(HT_HANDL_12, (0x80000000 | NV_IMAGE_PATTERN)); /* 32bit handle */
224 		ACCW(HT_VALUE_12, 0x00101146); /* instance $1146, engine = acc engine, CHID = $00 */
225 	}
226 	else
227 	{
228 		/* (first set) */
229 		ACCW(HT_HANDL_00, (0x80000000 | NV4_SURFACE)); /* 32bit handle */
230 		ACCW(HT_VALUE_00, 0x80011145); /* instance $1145, engine = acc engine, CHID = $00 */
231 
232 		ACCW(HT_HANDL_01, (0x80000000 | NV_IMAGE_BLIT)); /* 32bit handle */
233 		ACCW(HT_VALUE_01, 0x80011146); /* instance $1146, engine = acc engine, CHID = $00 */
234 
235 		ACCW(HT_HANDL_02, (0x80000000 | NV4_GDI_RECTANGLE_TEXT)); /* 32bit handle */
236 		ACCW(HT_VALUE_02, 0x80011147); /* instance $1147, engine = acc engine, CHID = $00 */
237 
238 		ACCW(HT_HANDL_03, (0x80000000 | NV4_CONTEXT_SURFACES_ARGB_ZS)); /* 32bit handle (3D) */
239 		ACCW(HT_VALUE_03, 0x80011148); /* instance $1148, engine = acc engine, CHID = $00 */
240 
241 		/* NV4_ and NV10_DX5_TEXTURE_TRIANGLE should be identical */
242 		ACCW(HT_HANDL_04, (0x80000000 | NV4_DX5_TEXTURE_TRIANGLE)); /* 32bit handle (3D) */
243 		ACCW(HT_VALUE_04, 0x80011149); /* instance $1149, engine = acc engine, CHID = $00 */
244 
245 		/* NV4_ and NV10_DX6_MULTI_TEXTURE_TRIANGLE should be identical */
246 		ACCW(HT_HANDL_05, (0x80000000 | NV4_DX6_MULTI_TEXTURE_TRIANGLE)); /* 32bit handle (not used) */
247 		ACCW(HT_VALUE_05, 0x8001114a); /* instance $114a, engine = acc engine, CHID = $00 */
248 
249 		ACCW(HT_HANDL_06, (0x80000000 | NV1_RENDER_SOLID_LIN)); /* 32bit handle (not used) */
250 		ACCW(HT_VALUE_06, 0x8001114b); /* instance $114b, engine = acc engine, CHID = $00 */
251 
252 		/* (second set) */
253 		ACCW(HT_HANDL_10, (0x80000000 | NV_ROP5_SOLID)); /* 32bit handle */
254 		ACCW(HT_VALUE_10, 0x80011142); /* instance $1142, engine = acc engine, CHID = $00 */
255 
256 		ACCW(HT_HANDL_11, (0x80000000 | NV_IMAGE_BLACK_RECTANGLE)); /* 32bit handle */
257 		ACCW(HT_VALUE_11, 0x80011143); /* instance $1143, engine = acc engine, CHID = $00 */
258 
259 		ACCW(HT_HANDL_12, (0x80000000 | NV_IMAGE_PATTERN)); /* 32bit handle */
260 		ACCW(HT_VALUE_12, 0x80011144); /* instance $1144, engine = acc engine, CHID = $00 */
261 	}
262 
263 	/* program CTX registers: CTX1 is mostly done later (colorspace dependant) */
264 	/* note:
265 	 * CTX determines which HT handles point to what engine commands. */
266 	/* note also:
267 	 * CTX registers are in fact in the same GPU internal RAM space as the engine's
268 	 * hashtable. This means that stuff programmed in here also survives resets and
269 	 * power-outages! (confirmed NV11) */
270 	if (si->ps.card_arch >= NV40A)
271 	{
272 		/* setup a DMA define for use by command defines below. */
273 		ACCW(PR_CTX0_R, 0x00003000); /* DMA page table present and of linear type;
274 									  * DMA target node is NVM (non-volatile memory?)
275 									  * (instead of doing PCI or AGP transfers) */
276 		ACCW(PR_CTX1_R, (si->ps.memory_size - 1)); /* DMA limit: size is all cardRAM */
277 		ACCW(PR_CTX2_R, ((0x00000000 & 0xfffff000) | 0x00000002));
278 									 /* DMA access type is READ_AND_WRITE;
279 									  * memory starts at start of cardRAM (b12-31):
280 									  * It's adress needs to be at a 4kb boundary! */
281 		ACCW(PR_CTX3_R, 0x00000002); /* unknown (looks like this is rubbish/not needed?) */
282 		/* setup set '0' for cmd NV_ROP5_SOLID */
283 		ACCW(PR_CTX0_0, 0x02080043); /* NVclass $043, patchcfg ROP_AND, nv10+: little endian */
284 		ACCW(PR_CTX2_0, 0x00000000); /* DMA0 and DMA1 instance invalid */
285 		ACCW(PR_CTX3_0, 0x00000000); /* method traps disabled */
286 		ACCW(PR_CTX0_1, 0x00000000); /* extra */
287 		ACCW(PR_CTX1_1, 0x00000000); /* extra */
288 		/* setup set '1' for cmd NV_IMAGE_BLACK_RECTANGLE */
289 		ACCW(PR_CTX0_2, 0x02080019); /* NVclass $019, patchcfg ROP_AND, nv10+: little endian */
290 		ACCW(PR_CTX2_2, 0x00000000); /* DMA0 and DMA1 instance invalid */
291 		ACCW(PR_CTX3_2, 0x00000000); /* method traps disabled */
292 		ACCW(PR_CTX0_3, 0x00000000); /* extra */
293 		ACCW(PR_CTX1_3, 0x00000000); /* extra */
294 		/* setup set '2' for cmd NV_IMAGE_PATTERN */
295 		ACCW(PR_CTX0_4, 0x02080018); /* NVclass $018, patchcfg ROP_AND, nv10+: little endian */
296 		ACCW(PR_CTX2_4, 0x00000000); /* DMA0 and DMA1 instance invalid */
297 		ACCW(PR_CTX3_4, 0x00000000); /* method traps disabled */
298 		ACCW(PR_CTX0_5, 0x00000000); /* extra */
299 		ACCW(PR_CTX1_5, 0x00000000); /* extra */
300 		/* setup set '4' for cmd NV_IMAGE_BLIT */
301 		ACCW(PR_CTX0_6, 0x0208005f); /* NVclass $05f, patchcfg ROP_AND, nv10+: little endian */
302 		ACCW(PR_CTX2_6, 0x00000000); /* DMA0 and DMA1 instance invalid */
303 		ACCW(PR_CTX3_6, 0x00000000); /* method traps disabled */
304 		ACCW(PR_CTX0_7, 0x00000000); /* extra */
305 		ACCW(PR_CTX1_7, 0x00000000); /* extra */
306 		/* setup set '5' for cmd NV4_GDI_RECTANGLE_TEXT */
307 		ACCW(PR_CTX0_8, 0x0208004a); /* NVclass $04a, patchcfg ROP_AND, nv10+: little endian */
308 		ACCW(PR_CTX2_8, 0x00000000); /* DMA0 and DMA1 instance invalid */
309 		ACCW(PR_CTX3_8, 0x00000000); /* method traps disabled */
310 		ACCW(PR_CTX0_9, 0x00000000); /* extra */
311 		ACCW(PR_CTX1_9, 0x00000000); /* extra */
312 		/* setup set '6' for cmd NV10_CONTEXT_SURFACES_2D */
313 		ACCW(PR_CTX0_A, 0x02080062); /* NVclass $062, nv10+: little endian */
314 		ACCW(PR_CTX2_A, 0x00001140); /* DMA0 instance is $1140, DMA1 instance invalid */
315 		ACCW(PR_CTX3_A, 0x00001140); /* method trap 0 is $1140, trap 1 disabled */
316 		ACCW(PR_CTX0_B, 0x00000000); /* extra */
317 		ACCW(PR_CTX1_B, 0x00000000); /* extra */
318 	}
319 	else
320 	{
321 		/* setup a DMA define for use by command defines below.
322 		 * (would currently be used by CTX 'sets' 0x6 upto/including 0xe: 3D stuff.) */
323 		ACCW(PR_CTX0_R, 0x00003000); /* DMA page table present and of linear type;
324 									  * DMA target node is NVM (non-volatile memory?)
325 									  * (instead of doing PCI or AGP transfers) */
326 		ACCW(PR_CTX1_R, (si->ps.memory_size - 1)); /* DMA limit: size is all cardRAM */
327 		ACCW(PR_CTX2_R, ((0x00000000 & 0xfffff000) | 0x00000002));
328 									 /* DMA access type is READ_AND_WRITE;
329 									  * memory starts at start of cardRAM (b12-31):
330 									  * It's adress needs to be at a 4kb boundary! */
331 		ACCW(PR_CTX3_R, 0x00000002); /* unknown (looks like this is rubbish/not needed?) */
332 		/* setup set '0' for cmd NV_ROP5_SOLID */
333 		ACCW(PR_CTX0_0, 0x01008043); /* NVclass $043, patchcfg ROP_AND, nv10+: little endian */
334 		ACCW(PR_CTX2_0, 0x00000000); /* DMA0 and DMA1 instance invalid */
335 		ACCW(PR_CTX3_0, 0x00000000); /* method traps disabled */
336 		/* setup set '1' for cmd NV_IMAGE_BLACK_RECTANGLE */
337 		ACCW(PR_CTX0_1, 0x01008019); /* NVclass $019, patchcfg ROP_AND, nv10+: little endian */
338 		ACCW(PR_CTX2_1, 0x00000000); /* DMA0 and DMA1 instance invalid */
339 		ACCW(PR_CTX3_1, 0x00000000); /* method traps disabled */
340 		/* setup set '2' for cmd NV_IMAGE_PATTERN */
341 		ACCW(PR_CTX0_2, 0x01008018); /* NVclass $018, patchcfg ROP_AND, nv10+: little endian */
342 		ACCW(PR_CTX2_2, 0x00000000); /* DMA0 and DMA1 instance invalid */
343 		ACCW(PR_CTX3_2, 0x00000000); /* method traps disabled */
344 //fixme: update 3D add-on and this code for the NV4_SURFACE command.
345 		/* setup set '3' for ... */
346 		if(si->ps.card_arch >= NV10A)
347 		{
348 			/* ... cmd NV10_CONTEXT_SURFACES_2D */
349 			ACCW(PR_CTX0_3, 0x01008062); /* NVclass $062, nv10+: little endian */
350 		}
351 		else
352 		{
353 			/* ... cmd NV4_SURFACE */
354 			ACCW(PR_CTX0_3, 0x01008042); /* NVclass $042, nv10+: little endian */
355 		}
356 		ACCW(PR_CTX1_3, 0x00000000); /* colorspace not set, notify instance invalid (b16-31) */
357 		ACCW(PR_CTX2_3, 0x11401140); /* DMA0 instance is $1140, DMA1 instance invalid */
358 		ACCW(PR_CTX3_3, 0x00000000); /* method trap 0 is $1140, trap 1 disabled */
359 		/* setup set '4' for cmd NV_IMAGE_BLIT */
360 		ACCW(PR_CTX0_4, 0x0100805f); /* NVclass $05f, patchcfg ROP_AND, nv10+: little endian */
361 		ACCW(PR_CTX2_4, 0x00000000); /* DMA0 and DMA1 instance invalid */
362 		ACCW(PR_CTX3_4, 0x00000000); /* method traps disabled */
363 		/* setup set '5' for cmd NV4_GDI_RECTANGLE_TEXT */
364 		ACCW(PR_CTX0_5, 0x0100804a); /* NVclass $04a, patchcfg ROP_AND, nv10+: little endian */
365 		ACCW(PR_CTX2_5, 0x00000000); /* DMA0 and DMA1 instance invalid */
366 		ACCW(PR_CTX3_5, 0x00000000); /* method traps disabled */
367 		/* setup set '6' ... */
368 		if (si->ps.card_arch != NV04A)
369 		{
370 			/* ... for cmd NV10_CONTEXT_SURFACES_ARGB_ZS */
371 			ACCW(PR_CTX0_6, 0x00000093); /* NVclass $093, nv10+: little endian */
372 		}
373 		else
374 		{
375 			/* ... for cmd NV4_CONTEXT_SURFACES_ARGB_ZS */
376 			ACCW(PR_CTX0_6, 0x00000053); /* NVclass $053, nv10+: little endian */
377 		}
378 		ACCW(PR_CTX2_6, 0x11401140); /* DMA0, DMA1 instance = $1140 */
379 		ACCW(PR_CTX3_6, 0x00000000); /* method traps disabled */
380 		/* setup set '7' ... */
381 		if (si->ps.card_arch != NV04A)
382 		{
383 			/* ... for cmd NV10_DX5_TEXTURE_TRIANGLE */
384 			ACCW(PR_CTX0_7, 0x0300a094); /* NVclass $094, patchcfg ROP_AND, userclip enable,
385 										  * context surface0 valid, nv10+: little endian */
386 		}
387 		else
388 		{
389 			/* ... for cmd NV4_DX5_TEXTURE_TRIANGLE */
390 			ACCW(PR_CTX0_7, 0x0300a054); /* NVclass $054, patchcfg ROP_AND, userclip enable,
391 										  * context surface0 valid */
392 		}
393 		ACCW(PR_CTX1_7, 0x00000d01); /* format is A8RGB24, MSB mono */
394 		ACCW(PR_CTX2_7, 0x11401140); /* DMA0, DMA1 instance = $1140 */
395 		ACCW(PR_CTX3_7, 0x00000000); /* method traps disabled */
396 		/* setup set '8' ... */
397 		if (si->ps.card_arch != NV04A)
398 		{
399 			/* ... for cmd NV10_DX6_MULTI_TEXTURE_TRIANGLE (not used) */
400 			ACCW(PR_CTX0_8, 0x0300a095); /* NVclass $095, patchcfg ROP_AND, userclip enable,
401 										  * context surface0 valid, nv10+: little endian */
402 		}
403 		else
404 		{
405 			/* ... for cmd NV4_DX6_MULTI_TEXTURE_TRIANGLE (not used) */
406 			ACCW(PR_CTX0_8, 0x0300a055); /* NVclass $055, patchcfg ROP_AND, userclip enable,
407 										  * context surface0 valid */
408 		}
409 		ACCW(PR_CTX1_8, 0x00000d01); /* format is A8RGB24, MSB mono */
410 		ACCW(PR_CTX2_8, 0x11401140); /* DMA0, DMA1 instance = $1140 */
411 		ACCW(PR_CTX3_8, 0x00000000); /* method traps disabled */
412 		/* setup set '9' for cmd NV1_RENDER_SOLID_LIN (not used) */
413 		ACCW(PR_CTX0_9, 0x0300a01c); /* NVclass $01c, patchcfg ROP_AND, userclip enable,
414 									  * context surface0 valid, nv10+: little endian */
415 		ACCW(PR_CTX2_9, 0x11401140); /* DMA0, DMA1 instance = $1140 */
416 		ACCW(PR_CTX3_9, 0x00000000); /* method traps disabled */
417 //fixme: update 3D add-on and this code for the NV4_SURFACE command.
418 		/* setup set '9' for cmd NV3_SURFACE_0 */
419 //		ACCW(PR_CTX0_9, 0x00000058); /* NVclass $058, nv10+: little endian */
420 //		ACCW(PR_CTX2_9, 0x11401140); /* DMA0, DMA1 instance = $1140 */
421 //		ACCW(PR_CTX3_9, 0x00000000); /* method traps disabled */
422 		/* setup set 'A' for cmd NV3_SURFACE_1 */
423 //		ACCW(PR_CTX0_A, 0x00000059); /* NVclass $059, nv10+: little endian */
424 //		ACCW(PR_CTX2_A, 0x11401140); /* DMA0, DMA1 instance = $1140 */
425 //		ACCW(PR_CTX3_A, 0x00000000); /* method traps disabled */
426 	}
427 
428 	/*** PGRAPH ***/
429 	switch (si->ps.card_arch)
430 	{
431 	case NV40A:
432 		/* set resetstate for most function blocks */
433 		ACCW(DEBUG0, 0x0003ffff);//?
434 		/* init some function blocks */
435 		ACCW(DEBUG1, 0x401287c0);
436 		ACCW(DEBUG2, 0x24f82ad9);//?
437 		ACCW(DEBUG3, 0x60de8051);
438 		/* end resetstate for the function blocks */
439 		ACCW(DEBUG0, 0x00000000);//?
440 		/* disable specific functions, but enable SETUP_SPARE2 register */
441 		ACCW(NV10_DEBUG4, 0x00008000);
442 		/* set limit_viol_pix_adress(?): more likely something unknown.. */
443 		ACCW(NV25_WHAT0, 0x00be3c5f);
444 		/* unknown.. */
445 		switch (si->ps.card_type)
446 		{
447 		case NV40:
448 		case NV45:
449 			ACCW(NV40_WHAT0, 0x83280fff);
450 			ACCW(NV40_WHAT1, 0x000000a0);
451 			ACCW(NV40_WHAT2, 0x0078e366);
452 			ACCW(NV40_WHAT3, 0x0000014c);
453 			break;
454 		case NV41:
455 			ACCW(NV40P_WHAT0, 0x83280eff);
456 			ACCW(NV40P_WHAT1, 0x000000a0);
457 			ACCW(NV40P_WHAT2, 0x007596ff);
458 			ACCW(NV40P_WHAT3, 0x00000108);
459 			break;
460 		case NV43:
461 			ACCW(NV40P_WHAT0, 0x83280eff);
462 			ACCW(NV40P_WHAT1, 0x000000a0);
463 			ACCW(NV40P_WHAT2, 0x0072cb77);
464 			ACCW(NV40P_WHAT3, 0x00000108);
465 			break;
466 		case NV44:
467 			ACCW(NV40P_WHAT0, 0x83280eff);
468 			ACCW(NV40P_WHAT1, 0x000000a0);
469 			ACCW(NV44_WHAT2, 0x00000000);
470 			ACCW(NV44_WHAT3, 0x00000000);
471 			/* unknown.. */
472 			NV_REG32(NV32_NV44_WHAT10) = NV_REG32(NV32_NV10STRAPINFO);
473 			NV_REG32(NV32_NV44_WHAT11) = 0x00000000;
474 			NV_REG32(NV32_NV44_WHAT12) = 0x00000000;
475 			NV_REG32(NV32_NV44_WHAT13) = NV_REG32(NV32_NV10STRAPINFO);
476 			break;
477 		default:
478 			ACCW(NV40P_WHAT0, 0x83280eff);
479 			ACCW(NV40P_WHAT1, 0x000000a0);
480 			break;
481 		}
482 		break;
483 	case NV04A:
484 		/* init some function blocks */
485 		ACCW(DEBUG0, 0x1231c001);
486 		ACCW(DEBUG1, 0x72111101);
487 		ACCW(DEBUG2, 0x11d5f071);
488 		ACCW(DEBUG3, 0x10d4ff31);
489 		break;
490 	default:
491 		/* set resetstate for most function blocks */
492 		ACCW(DEBUG0, 0x0003ffff);
493 		/* init some function blocks */
494 		ACCW(DEBUG1, 0x00118701);
495 		ACCW(DEBUG2, 0x24f82ad9);
496 		ACCW(DEBUG3, 0x55de0030);
497 		/* end resetstate for the function blocks */
498 		ACCW(DEBUG0, 0x00000000);
499 		/* disable specific functions */
500 		ACCW(NV10_DEBUG4, 0);
501 		break;
502 	}
503 
504 	/* reset all cache sets */
505 	ACCW(CACHE1_1, 0);
506 	ACCW(CACHE1_2, 0);
507 	ACCW(CACHE1_3, 0);
508 	ACCW(CACHE1_4, 0);
509 	ACCW(CACHE1_5, 0);
510 	ACCW(CACHE2_1, 0);
511 	ACCW(CACHE2_2, 0);
512 	ACCW(CACHE2_3, 0);
513 	ACCW(CACHE2_4, 0);
514 	ACCW(CACHE2_5, 0);
515 	ACCW(CACHE3_1, 0);
516 	ACCW(CACHE3_2, 0);
517 	ACCW(CACHE3_3, 0);
518 	ACCW(CACHE3_4, 0);
519 	ACCW(CACHE3_5, 0);
520 	ACCW(CACHE4_1, 0);
521 	ACCW(CACHE4_2, 0);
522 	ACCW(CACHE4_3, 0);
523 	ACCW(CACHE4_4, 0);
524 	ACCW(CACHE4_5, 0);
525 	if (si->ps.card_arch != NV04A)
526 		ACCW(NV10_CACHE5_1, 0);
527 	ACCW(CACHE5_2, 0);
528 	ACCW(CACHE5_3, 0);
529 	ACCW(CACHE5_4, 0);
530 	ACCW(CACHE5_5, 0);
531 	if (si->ps.card_arch != NV04A)
532 		ACCW(NV10_CACHE6_1, 0);
533 	ACCW(CACHE6_2, 0);
534 	ACCW(CACHE6_3, 0);
535 	ACCW(CACHE6_4, 0);
536 	ACCW(CACHE6_5, 0);
537 	if (si->ps.card_arch != NV04A)
538 		ACCW(NV10_CACHE7_1, 0);
539 	ACCW(CACHE7_2, 0);
540 	ACCW(CACHE7_3, 0);
541 	ACCW(CACHE7_4, 0);
542 	ACCW(CACHE7_5, 0);
543 	if (si->ps.card_arch != NV04A)
544 		ACCW(NV10_CACHE8_1, 0);
545 	ACCW(CACHE8_2, 0);
546 	ACCW(CACHE8_3, 0);
547 	ACCW(CACHE8_4, 0);
548 	ACCW(CACHE8_5, 0);
549 
550 	if (si->ps.card_arch != NV04A)
551 	{
552 		/* reset (disable) context switch stuff */
553 		ACCW(NV10_CTX_SW1, 0);
554 		ACCW(NV10_CTX_SW2, 0);
555 		ACCW(NV10_CTX_SW3, 0);
556 		ACCW(NV10_CTX_SW4, 0);
557 		ACCW(NV10_CTX_SW5, 0);
558 	}
559 
560 	/* setup accesible card memory range for acc engine */
561 	ACCW(BBASE0, 0x00000000);
562 	ACCW(BBASE1, 0x00000000);
563 	ACCW(BBASE2, 0x00000000);
564 	ACCW(BBASE3, 0x00000000);
565 	ACCW(BLIMIT0, (si->ps.memory_size - 1));
566 	ACCW(BLIMIT1, (si->ps.memory_size - 1));
567 	ACCW(BLIMIT2, (si->ps.memory_size - 1));
568 	ACCW(BLIMIT3, (si->ps.memory_size - 1));
569 	if (si->ps.card_arch >= NV10A)
570 	{
571 		ACCW(NV10_BBASE4, 0x00000000);
572 		ACCW(NV10_BBASE5, 0x00000000);
573 		ACCW(NV10_BLIMIT4, (si->ps.memory_size - 1));
574 		ACCW(NV10_BLIMIT5, (si->ps.memory_size - 1));
575 	}
576 	if (si->ps.card_arch >= NV20A)
577 	{
578 		if ((si->ps.card_type > NV40) && (si->ps.card_type != NV45))
579 		{
580 			ACCW(NV40P_BLIMIT6, (si->ps.memory_size - 1));
581 			ACCW(NV40P_BLIMIT7, (si->ps.memory_size - 1));
582 		}
583 		else
584 		{
585 			/* fixme(?): assuming more BLIMIT registers here: Then how about BBASE6-9? */
586 			ACCW(NV20_BLIMIT6, (si->ps.memory_size - 1));
587 			ACCW(NV20_BLIMIT7, (si->ps.memory_size - 1));
588 			if (si->ps.card_type < NV40)
589 			{
590 				ACCW(NV20_BLIMIT8, (si->ps.memory_size - 1));
591 				ACCW(NV20_BLIMIT9, (si->ps.memory_size - 1));
592 			}
593 		}
594 	}
595 
596 	/* disable all acceleration engine INT reguests */
597 	ACCW(ACC_INTE, 0x00000000);
598 
599 	/* reset all acceration engine INT status bits */
600 	ACCW(ACC_INTS, 0xffffffff);
601 	if (si->ps.card_arch != NV04A)
602 	{
603 		/* context control enabled */
604 		ACCW(NV10_CTX_CTRL, 0x10010100);
605 		/* all acceleration buffers, pitches and colors are valid */
606 		ACCW(NV10_ACC_STAT, 0xffffffff);
607 	}
608 	else
609 	{
610 		/* context control enabled */
611 		ACCW(NV04_CTX_CTRL, 0x10010100);
612 		/* all acceleration buffers, pitches and colors are valid */
613 		ACCW(NV04_ACC_STAT, 0xffffffff);
614 	}
615 	/* enable acceleration engine command FIFO */
616 	ACCW(FIFO_EN, 0x00000001);
617 	/* pattern shape value = 8x8, 2 color */
618 	ACCW(PAT_SHP, 0x00000000);
619 	if (si->ps.card_arch != NV04A)
620 	{
621 		/* surface type is non-swizzle */
622 		ACCW(NV10_SURF_TYP, 0x00000001);
623 	}
624 	else
625 	{
626 		/* surface type is non-swizzle */
627 		ACCW(NV04_SURF_TYP, 0x00000001);
628 	}
629 
630 	/*** Set pixel width and format ***/
631 	//info:
632 	//the BPIXEL register holds the colorspaces for different engine 'contexts' or so.
633 	//B0-3 is 'channel' 0, b4-7 is 'channel '1', etc.
634 	//It looks like we are only using channel 0, so the settings for other channels
635 	//shouldn't matter yet.
636 	//When for instance rect_fill is going to be used on other buffers than the actual
637 	//screen, it's colorspace should be corrected. When the engine is setup in 32bit
638 	//desktop mode for example, the pixel's alpha channel doesn't get touched currently.
639 	//choose mode $d (which is Y32) to get alpha filled too.
640 	switch(si->dm.space)
641 	{
642 	case B_CMAP8:
643 		/* acc engine */
644 		ACCW(FORMATS, 0x00001010);
645 		if (si->ps.card_arch < NV30A)
646 			/* set depth 0-5: $1 = Y8 */
647 			ACCW(BPIXEL, 0x00111111);
648 		else
649 			/* set depth 0-1: $1 = Y8, $2 = X1R5G5B5_Z1R5G5B5 */
650 			ACCW(BPIXEL, 0x00000021);
651 		ACCW(STRD_FMT, 0x03020202);
652 		/* PRAMIN */
653 		if (si->ps.card_arch < NV40A)
654 		{
655 			ACCW(PR_CTX1_0, 0x00000302); /* format is X24Y8, LSB mono */
656 			ACCW(PR_CTX1_1, 0x00000302); /* format is X24Y8, LSB mono */
657 			ACCW(PR_CTX1_2, 0x00000202); /* format is X16A8Y8, LSB mono */
658 			ACCW(PR_CTX1_3, 0x00000302); /* format is X24Y8, LSB mono */
659 			ACCW(PR_CTX1_4, 0x00000302); /* format is X24Y8, LSB mono */
660 			ACCW(PR_CTX1_5, 0x00000302); /* format is X24Y8, LSB mono */
661 			if (si->ps.card_arch == NV04A)
662 			{
663 				ACCW(PR_CTX1_6, 0x00000302); /* format is X24Y8, LSB mono */
664 			}
665 			else
666 			{
667 				ACCW(PR_CTX1_6, 0x00000000); /* format is invalid */
668 			}
669 			ACCW(PR_CTX1_9, 0x00000302); /* format is X24Y8, LSB mono */
670 //fixme: update 3D add-on and this code for the NV4_SURFACE command.
671 //old surf0 and 1:
672 //			ACCW(PR_CTX1_9, 0x00000302); /* format is X24Y8, LSB mono */
673 //			ACCW(PR_CTX2_9, 0x00000302); /* dma_instance 0 valid, instance 1 invalid */
674 		}
675 		else
676 		{
677 			//fixme: select colorspace here (and in other depths), or add
678 			//the appropriate SURFACE command(s).
679 			ACCW(PR_CTX1_0, 0x00000000); /* NV_ROP5_SOLID */
680 			ACCW(PR_CTX1_2, 0x00000000); /* NV_IMAGE_BLACK_RECTANGLE */
681 			ACCW(PR_CTX1_4, 0x02000000); /* NV_IMAGE_PATTERN */
682 			ACCW(PR_CTX1_6, 0x00000000); /* NV_IMAGE_BLIT */
683 			ACCW(PR_CTX1_8, 0x02000000); /* NV4_GDI_RECTANGLE_TEXT */
684 			ACCW(PR_CTX1_A, 0x02000000); /* NV10_CONTEXT_SURFACES_2D */
685 		}
686 		break;
687 	case B_RGB15_LITTLE:
688 		/* acc engine */
689 		ACCW(FORMATS, 0x00002071);
690 		if (si->ps.card_arch < NV30A)
691 			/* set depth 0-5: $2 = X1R5G5B5_Z1R5G5B5, $6 = Y16 */
692 			ACCW(BPIXEL, 0x00226222);
693 		else
694 			/* set depth 0-1: $2 = X1R5G5B5_Z1R5G5B5, $4 = A1R5G5B5 */
695 			ACCW(BPIXEL, 0x00000042);
696 		ACCW(STRD_FMT, 0x09080808);
697 		/* PRAMIN */
698 		ACCW(PR_CTX1_0, 0x00000902); /* format is X17RGB15, LSB mono */
699 		ACCW(PR_CTX1_1, 0x00000902); /* format is X17RGB15, LSB mono */
700 		ACCW(PR_CTX1_2, 0x00000802); /* format is X16A1RGB15, LSB mono */
701 		ACCW(PR_CTX1_3, 0x00000902); /* format is X17RGB15, LSB mono */
702 		ACCW(PR_CTX1_4, 0x00000902); /* format is X17RGB15, LSB mono */
703 		ACCW(PR_CTX1_5, 0x00000902); /* format is X17RGB15, LSB mono */
704 		ACCW(PR_CTX1_6, 0x00000902); /* format is X17RGB15, LSB mono */
705 		ACCW(PR_CTX1_9, 0x00000902); /* format is X17RGB15, LSB mono */
706 //old surf0 and 1:
707 //		ACCW(PR_CTX1_9, 0x00000902); /* format is X17RGB15, LSB mono */
708 //		ACCW(PR_CTX2_9, 0x00000902); /* dma_instance 0 valid, instance 1 invalid */
709 		break;
710 	case B_RGB16_LITTLE:
711 		/* acc engine */
712 		ACCW(FORMATS, 0x000050C2);
713 		if (si->ps.card_arch < NV30A)
714 			/* set depth 0-5: $5 = R5G6B5, $6 = Y16 */
715 			ACCW(BPIXEL, 0x00556555);
716 		else
717 			/* set depth 0-1: $5 = R5G6B5, $a = X1A7R8G8B8_O1A7R8G8B8 */
718 			ACCW(BPIXEL, 0x000000a5);
719 		if (si->ps.card_arch == NV04A)
720 			ACCW(STRD_FMT, 0x0c0b0b0b);
721 		else
722 			ACCW(STRD_FMT, 0x000b0b0c);
723 		/* PRAMIN */
724 		ACCW(PR_CTX1_0, 0x00000c02); /* format is X16RGB16, LSB mono */
725 		ACCW(PR_CTX1_1, 0x00000c02); /* format is X16RGB16, LSB mono */
726 		ACCW(PR_CTX1_2, 0x00000b02); /* format is A16RGB16, LSB mono */
727 		ACCW(PR_CTX1_3, 0x00000c02); /* format is X16RGB16, LSB mono */
728 		ACCW(PR_CTX1_4, 0x00000c02); /* format is X16RGB16, LSB mono */
729 		ACCW(PR_CTX1_5, 0x00000c02); /* format is X16RGB16, LSB mono */
730 		ACCW(PR_CTX1_6, 0x00000c02); /* format is X16RGB16, LSB mono */
731 		ACCW(PR_CTX1_9, 0x00000c02); /* format is X16RGB16, LSB mono */
732 //old surf0 and 1:
733 //		ACCW(PR_CTX1_9, 0x00000c02); /* format is X16RGB16, LSB mono */
734 //		ACCW(PR_CTX2_9, 0x00000c02); /* dma_instance 0 valid, instance 1 invalid */
735 		break;
736 	case B_RGB32_LITTLE:
737 	case B_RGBA32_LITTLE:
738 		/* acc engine */
739 		ACCW(FORMATS, 0x000070e5);
740 		if (si->ps.card_arch < NV30A)
741 			/* set depth 0-5: $7 = X8R8G8B8_Z8R8G8B8, $d = Y32 */
742 			ACCW(BPIXEL, 0x0077d777);
743 		else
744 			/* set depth 0-1: $7 = X8R8G8B8_Z8R8G8B8, $e = V8YB8U8YA8 */
745 			ACCW(BPIXEL, 0x000000e7);
746 		ACCW(STRD_FMT, 0x0e0d0d0d);
747 		/* PRAMIN */
748 		ACCW(PR_CTX1_0, 0x00000e02); /* format is X8RGB24, LSB mono */
749 		ACCW(PR_CTX1_1, 0x00000e02); /* format is X8RGB24, LSB mono */
750 		ACCW(PR_CTX1_2, 0x00000d02); /* format is A8RGB24, LSB mono */
751 		ACCW(PR_CTX1_3, 0x00000e02); /* format is X8RGB24, LSB mono */
752 		ACCW(PR_CTX1_4, 0x00000e02); /* format is X8RGB24, LSB mono */
753 		ACCW(PR_CTX1_5, 0x00000e02); /* format is X8RGB24, LSB mono */
754 		ACCW(PR_CTX1_6, 0x00000e02); /* format is X8RGB24, LSB mono */
755 		ACCW(PR_CTX1_9, 0x00000e02); /* format is X8RGB24, LSB mono */
756 //old surf0 and 1:
757 //		ACCW(PR_CTX1_9, 0x00000e02); /* format is X8RGB24, LSB mono */
758 //		ACCW(PR_CTX2_9, 0x00000e02); /* dma_instance 0 valid, instance 1 invalid */
759 		break;
760 	default:
761 		LOG(8,("ACC: init, invalid bit depth\n"));
762 		return B_ERROR;
763 	}
764 
765 	/* setup some extra stuff for NV30A and later */
766 	if (si->ps.card_arch >= NV30A)
767 	{
768 		/* activate Zcullflush(?) */
769 		ACCW(DEBUG3, (ACCR(DEBUG3) | 0x00000001));
770 		/* unknown */
771 		ACCW(NV25_WHAT1, (ACCR(NV25_WHAT1) | 0x00040000));
772 	}
773 
774 	/*** setup screen location and pitch ***/
775 	switch (si->ps.card_arch)
776 	{
777 	case NV04A:
778 	case NV10A:
779 		/* location of active screen in framebuffer */
780 		/* (confirmed NV05: OFFSET0 is 2D destination buffer offset) */
781 		ACCW(OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
782 		/* (confirmed NV05: OFFSET1 is 2D source buffer offset) */
783 		ACCW(OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
784 		/* (confirmed NV05: OFFSET2 is 3D color buffer offset) */
785 		ACCW(OFFSET2, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
786 		/* (confirmed NV05: OFFSET3 is 3D depth buffer offset) */
787 		ACCW(OFFSET3, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
788 		ACCW(OFFSET4, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
789 		ACCW(OFFSET5, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
790 
791 		/* setup buffer pitch */
792 		/* (confirmed NV05: PITCH0 is 2D destination buffer pitch) */
793 		ACCW(PITCH0, (si->fbc.bytes_per_row & 0x0000ffff));
794 		/* (confirmed NV05: PITCH1 is 2D source buffer pitch) */
795 		ACCW(PITCH1, (si->fbc.bytes_per_row & 0x0000ffff));
796 		/* (confirmed NV05: PITCH2 is 3D color buffer pitch) */
797 		ACCW(PITCH2, (si->fbc.bytes_per_row & 0x0000ffff));
798 		/* (confirmed NV05: PITCH3 is 3D depth buffer pitch) */
799 		ACCW(PITCH3, (si->fbc.bytes_per_row & 0x0000ffff));
800 		ACCW(PITCH4, (si->fbc.bytes_per_row & 0x0000ffff));
801 		break;
802 	case NV20A:
803 	case NV30A:
804 		/* location of active screen in framebuffer */
805 		ACCW(NV20_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
806 		ACCW(NV20_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
807 		ACCW(NV20_OFFSET2, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
808 		ACCW(NV20_OFFSET3, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
809 
810 		/* setup buffer pitch */
811 		ACCW(NV20_PITCH0, (si->fbc.bytes_per_row & 0x0000ffff));
812 		ACCW(NV20_PITCH1, (si->fbc.bytes_per_row & 0x0000ffff));
813 		ACCW(NV20_PITCH2, (si->fbc.bytes_per_row & 0x0000ffff));
814 		ACCW(NV20_PITCH3, (si->fbc.bytes_per_row & 0x0000ffff));
815 		break;
816 	case NV40A:
817 		if ((si->ps.card_type == NV40) || (si->ps.card_type == NV45))
818 		{
819 			/* location of active screen in framebuffer */
820 			ACCW(NV20_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
821 			ACCW(NV20_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
822 			//ACCW(NV20_OFFSET2, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
823 			//ACCW(NV20_OFFSET3, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
824 
825 			/* setup buffer pitch */
826 			//fixme?
827 			ACCW(NV20_PITCH0, (si->fbc.bytes_per_row & 0x0000ffff));
828 			ACCW(NV20_PITCH1, (si->fbc.bytes_per_row & 0x0000ffff));
829 			ACCW(NV20_PITCH2, (si->fbc.bytes_per_row & 0x0000ffff));
830 			ACCW(NV20_PITCH3, (si->fbc.bytes_per_row & 0x0000ffff));
831 		}
832 		else
833 		{
834 			/* location of active screen in framebuffer */
835 			ACCW(NV40P_OFFSET0, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
836 			ACCW(NV40P_OFFSET1, ((uint8*)si->fbc.frame_buffer - (uint8*)si->framebuffer));
837 
838 			/* setup buffer pitch */
839 			//fixme?
840 			ACCW(NV40P_PITCH0, (si->fbc.bytes_per_row & 0x0000ffff));
841 			ACCW(NV40P_PITCH1, (si->fbc.bytes_per_row & 0x0000ffff));
842 		}
843 		break;
844 	}
845 
846 	/*** setup tile and pipe stuff ***/
847 	if (si->ps.card_arch >= NV10A)
848 	{
849 		/* setup acc engine tile stuff: */
850 		/* reset tile adresses */
851 		ACCW(NV10_FBTIL0AD, 0);
852 		ACCW(NV10_FBTIL1AD, 0);
853 		ACCW(NV10_FBTIL2AD, 0);
854 		ACCW(NV10_FBTIL3AD, 0);
855 		ACCW(NV10_FBTIL4AD, 0);
856 		ACCW(NV10_FBTIL5AD, 0);
857 		ACCW(NV10_FBTIL6AD, 0);
858 		ACCW(NV10_FBTIL7AD, 0);
859 		/* copy some RAM configuration info(?) */
860 		if (si->ps.card_arch >= NV20A)
861 		{
862 			if ((si->ps.card_type > NV40) && (si->ps.card_type != NV45))
863 			{
864 				ACCW(NV40P_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0));
865 				ACCW(NV40P_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1));
866 				ACCW(NV40P_WHAT_T2, NV_REG32(NV32_PFB_CONFIG_0));
867 				ACCW(NV40P_WHAT_T3, NV_REG32(NV32_PFB_CONFIG_1));
868 			}
869 			else
870 			{
871 				ACCW(NV20_WHAT_T0, NV_REG32(NV32_PFB_CONFIG_0));
872 				ACCW(NV20_WHAT_T1, NV_REG32(NV32_PFB_CONFIG_1));
873 				if ((si->ps.card_type == NV40) || (si->ps.card_type == NV45))
874 				{
875 					ACCW(NV40_WHAT_T2, NV_REG32(NV32_PFB_CONFIG_0));
876 					ACCW(NV40_WHAT_T3, NV_REG32(NV32_PFB_CONFIG_1));
877 				}
878 			}
879 		}
880 		/* copy tile setup stuff from 'source' to acc engine */
881 		/* tile 0: */
882 		/* tile invalid, tile adress = $00000 (18bit) */
883 		ACCW(NV10_TIL0AD, ACCR(NV10_FBTIL0AD));
884 		/* set tile end adress (18bit) */
885 		ACCW(NV10_TIL0ED, ACCR(NV10_FBTIL0ED));
886 		/* set tile size pitch (8bit: b8-15) */
887 		ACCW(NV10_TIL0PT, ACCR(NV10_FBTIL0PT));
888 		/* set tile status */
889 		ACCW(NV10_TIL0ST, ACCR(NV10_FBTIL0ST));
890 		/* tile 1: */
891 		ACCW(NV10_TIL1AD, ACCR(NV10_FBTIL1AD));
892 		ACCW(NV10_TIL1ED, ACCR(NV10_FBTIL1ED));
893 		ACCW(NV10_TIL1PT, ACCR(NV10_FBTIL1PT));
894 		ACCW(NV10_TIL1ST, ACCR(NV10_FBTIL1ST));
895 		/* tile 2: */
896 		ACCW(NV10_TIL2AD, ACCR(NV10_FBTIL2AD));
897 		ACCW(NV10_TIL2ED, ACCR(NV10_FBTIL2ED));
898 		ACCW(NV10_TIL2PT, ACCR(NV10_FBTIL2PT));
899 		ACCW(NV10_TIL2ST, ACCR(NV10_FBTIL2ST));
900 		/* tile 3: */
901 		ACCW(NV10_TIL3AD, ACCR(NV10_FBTIL3AD));
902 		ACCW(NV10_TIL3ED, ACCR(NV10_FBTIL3ED));
903 		if (si->ps.card_arch >= NV40A)
904 		{
905 			ACCW(NV10_TIL3PT, 0x2ffff800);
906 			ACCW(NV10_TIL3ST, 0x00006000);
907 		}
908 		else
909 		{
910 			ACCW(NV10_TIL3PT, ACCR(NV10_FBTIL3PT));
911 			ACCW(NV10_TIL3ST, ACCR(NV10_FBTIL3ST));
912 		}
913 		/* tile 4: */
914 		ACCW(NV10_TIL4AD, ACCR(NV10_FBTIL4AD));
915 		ACCW(NV10_TIL4ED, ACCR(NV10_FBTIL4ED));
916 		ACCW(NV10_TIL4PT, ACCR(NV10_FBTIL4PT));
917 		ACCW(NV10_TIL4ST, ACCR(NV10_FBTIL4ST));
918 		/* tile 5: */
919 		ACCW(NV10_TIL5AD, ACCR(NV10_FBTIL5AD));
920 		ACCW(NV10_TIL5ED, ACCR(NV10_FBTIL5ED));
921 		ACCW(NV10_TIL5PT, ACCR(NV10_FBTIL5PT));
922 		ACCW(NV10_TIL5ST, ACCR(NV10_FBTIL5ST));
923 		/* tile 6: */
924 		ACCW(NV10_TIL6AD, ACCR(NV10_FBTIL6AD));
925 		ACCW(NV10_TIL6ED, ACCR(NV10_FBTIL6ED));
926 		ACCW(NV10_TIL6PT, ACCR(NV10_FBTIL6PT));
927 		ACCW(NV10_TIL6ST, ACCR(NV10_FBTIL6ST));
928 		/* tile 7: */
929 		ACCW(NV10_TIL7AD, ACCR(NV10_FBTIL7AD));
930 		ACCW(NV10_TIL7ED, ACCR(NV10_FBTIL7ED));
931 		ACCW(NV10_TIL7PT, ACCR(NV10_FBTIL7PT));
932 		ACCW(NV10_TIL7ST, ACCR(NV10_FBTIL7ST));
933 
934 		if (si->ps.card_arch >= NV40A)
935 		{
936 			/* unknown.. */
937 			ACCW(NV4X_WHAT1, 0x01000000);
938 			/* engine data source DMA instance is invalid */
939 			ACCW(NV4X_DMA_SRC, 0x00000000);
940 		}
941 
942 		/* setup (clear) pipe */
943 		/* set eyetype to local, lightning is off */
944 		ACCW(NV10_XFMOD0, 0x10000000);
945 		/* disable all lights */
946 		ACCW(NV10_XFMOD1, 0x00000000);
947 
948 		ACCW(NV10_PIPEADR, 0x00000040);
949 		ACCW(NV10_PIPEDAT, 0x00000008);
950 
951 		/* note: upon writing data into the PIPEDAT register, the PIPEADR is
952 		 * probably auto-incremented! */
953 		ACCW(NV10_PIPEADR, 0x00000200);
954 		for (cnt = 0; cnt < (3 * 16); cnt++) ACCW(NV10_PIPEDAT, 0x00000000);
955 
956 		ACCW(NV10_PIPEADR, 0x00000040);
957 		ACCW(NV10_PIPEDAT, 0x00000000);
958 
959 		//fixme: this 'set' seems to hang the NV43 engine if executed:
960 		//status remains 'busy' forever in this case.
961 		if (si->ps.card_arch < NV40A)
962 		{
963 			ACCW(NV10_PIPEADR, 0x00000800);
964 			for (cnt = 0; cnt < (16 * 16); cnt++) ACCW(NV10_PIPEDAT, 0x00000000);
965 		}
966 
967 		/* turn lightning on */
968 		ACCW(NV10_XFMOD0, 0x30000000);
969 		/* set light 1 to infinite type, other lights remain off */
970 		ACCW(NV10_XFMOD1, 0x00000004);
971 
972 		ACCW(NV10_PIPEADR, 0x00006400);
973 		for (cnt = 0; cnt < (59 * 4); cnt++) ACCW(NV10_PIPEDAT, 0x00000000);
974 
975 		ACCW(NV10_PIPEADR, 0x00006800);
976 		for (cnt = 0; cnt < (47 * 4); cnt++) ACCW(NV10_PIPEDAT, 0x00000000);
977 
978 		ACCW(NV10_PIPEADR, 0x00006c00);
979 		for (cnt = 0; cnt < (3 * 4); cnt++) ACCW(NV10_PIPEDAT, 0x00000000);
980 
981 		ACCW(NV10_PIPEADR, 0x00007000);
982 		for (cnt = 0; cnt < (19 * 4); cnt++) ACCW(NV10_PIPEDAT, 0x00000000);
983 
984 		ACCW(NV10_PIPEADR, 0x00007400);
985 		for (cnt = 0; cnt < (12 * 4); cnt++) ACCW(NV10_PIPEDAT, 0x00000000);
986 
987 		ACCW(NV10_PIPEADR, 0x00007800);
988 		for (cnt = 0; cnt < (12 * 4); cnt++) ACCW(NV10_PIPEDAT, 0x00000000);
989 
990 		ACCW(NV10_PIPEADR, 0x00004400);
991 		for (cnt = 0; cnt < (8 * 4); cnt++) ACCW(NV10_PIPEDAT, 0x00000000);
992 
993 		ACCW(NV10_PIPEADR, 0x00000000);
994 		for (cnt = 0; cnt < 16; cnt++) ACCW(NV10_PIPEDAT, 0x00000000);
995 
996 		ACCW(NV10_PIPEADR, 0x00000040);
997 		for (cnt = 0; cnt < 4; cnt++) ACCW(NV10_PIPEDAT, 0x00000000);
998 	}
999 
1000 	/* setup 3D specifics */
1001 	nv_init_for_3D();
1002 
1003 	/*** setup acceleration engine command shortcuts (so via fifo) ***/
1004 	/* set object handles (b31 = 1 selects 'config' function?) */
1005 	/* note:
1006 	 * probably depending on some other setup, there are 8 or 32 FIFO channels
1007 	 * available. Assuming the current setup only has 8 channels because the 'rest'
1008 	 * isn't setup here... */
1009 	si->engine.fifo.handle[0] = NV_ROP5_SOLID;
1010 	si->engine.fifo.handle[1] = NV_IMAGE_BLACK_RECTANGLE;
1011 	si->engine.fifo.handle[2] = NV_IMAGE_PATTERN;
1012 	si->engine.fifo.handle[3] = NV4_SURFACE; /* NV10_CONTEXT_SURFACES_2D is identical */
1013 	si->engine.fifo.handle[4] = NV_IMAGE_BLIT;
1014 	si->engine.fifo.handle[5] = NV4_GDI_RECTANGLE_TEXT;
1015 	if (si->ps.card_arch < NV40A)
1016 	{
1017 		si->engine.fifo.handle[6] = NV1_RENDER_SOLID_LIN;
1018 		si->engine.fifo.handle[7] = NV4_DX5_TEXTURE_TRIANGLE;
1019 	}
1020 	/* preset no FIFO channels assigned to cmd's */
1021 	for (cnt = 0; cnt < 0x20; cnt++)
1022 	{
1023 		si->engine.fifo.ch_ptr[cnt] = 0;
1024 	}
1025 	/* set handle's pointers to their assigned FIFO channels */
1026 	for (cnt = 0; cnt < 0x08; cnt++)
1027 	{
1028 		si->engine.fifo.ch_ptr[(si->engine.fifo.handle[cnt])] =
1029 			(NVACC_FIFO + (cnt * 0x00002000));
1030 	}
1031 	/* program FIFO assignments */
1032 	ACCW(FIFO_CH0, (0x80000000 | si->engine.fifo.handle[0])); /* Raster OPeration */
1033 	ACCW(FIFO_CH1, (0x80000000 | si->engine.fifo.handle[1])); /* Clip */
1034 	ACCW(FIFO_CH2, (0x80000000 | si->engine.fifo.handle[2])); /* Pattern */
1035 	ACCW(FIFO_CH3, (0x80000000 | si->engine.fifo.handle[3])); /* 2D Surface */
1036 	ACCW(FIFO_CH4, (0x80000000 | si->engine.fifo.handle[4])); /* Blit */
1037 	ACCW(FIFO_CH5, (0x80000000 | si->engine.fifo.handle[5])); /* Bitmap */
1038 	if (si->ps.card_arch < NV40A)
1039 	{
1040 		ACCW(FIFO_CH6, (0x80000000 | si->engine.fifo.handle[6])); /* Line (not used) */
1041 		ACCW(FIFO_CH7, (0x80000000 | si->engine.fifo.handle[7])); /* Textured Triangle (3D only) */
1042 	}
1043 
1044 	/* initialize our local pointers */
1045 	nv_acc_assert_fifo();
1046 
1047 	/* do first actual acceleration engine command:
1048 	 * setup clipping region (workspace size) to 32768 x 32768 pixels:
1049 	 * wait for room in fifo for clipping cmd if needed.
1050 	 * (fifo holds 256 32bit words: count those, not bytes) */
1051 	while (((nv_image_black_rectangle_ptr->FifoFree) >> 2) < 2)
1052 	{
1053 		/* snooze a bit so I do not hammer the bus */
1054 		snooze (10);
1055 	}
1056 	/* now setup clipping (writing 2 32bit words) */
1057 	nv_image_black_rectangle_ptr->TopLeft = 0x00000000;
1058 	nv_image_black_rectangle_ptr->HeightWidth = 0x80008000;
1059 
1060 	return B_OK;
1061 }
1062 
1063 static void nv_init_for_3D(void)
1064 {
1065 	/* setup PGRAPH unknown registers and modify (pre-cleared) pipe stuff for 3D use */
1066 	if (si->ps.card_arch >= NV10A)
1067 	{
1068 		/* setup unknown PGRAPH stuff */
1069 		ACCW(PGWHAT_00, 0x00000000);
1070 		ACCW(PGWHAT_01, 0x00000000);
1071 		ACCW(PGWHAT_02, 0x00000000);
1072 		ACCW(PGWHAT_03, 0x00000000);
1073 
1074 		ACCW(PGWHAT_04, 0x00001000);
1075 		ACCW(PGWHAT_05, 0x00001000);
1076 		ACCW(PGWHAT_06, 0x4003ff80);
1077 
1078 		ACCW(PGWHAT_07, 0x00000000);
1079 		ACCW(PGWHAT_08, 0x00000000);
1080 		ACCW(PGWHAT_09, 0x00000000);
1081 		ACCW(PGWHAT_0A, 0x00000000);
1082 		ACCW(PGWHAT_0B, 0x00000000);
1083 
1084 		ACCW(PGWHAT_0C, 0x00080008);
1085 		ACCW(PGWHAT_0D, 0x00080008);
1086 
1087 		ACCW(PGWHAT_0E, 0x00000000);
1088 		ACCW(PGWHAT_0F, 0x00000000);
1089 		ACCW(PGWHAT_10, 0x00000000);
1090 		ACCW(PGWHAT_11, 0x00000000);
1091 		ACCW(PGWHAT_12, 0x00000000);
1092 		ACCW(PGWHAT_13, 0x00000000);
1093 		ACCW(PGWHAT_14, 0x00000000);
1094 		ACCW(PGWHAT_15, 0x00000000);
1095 		ACCW(PGWHAT_16, 0x00000000);
1096 		ACCW(PGWHAT_17, 0x00000000);
1097 		ACCW(PGWHAT_18, 0x00000000);
1098 
1099 		ACCW(PGWHAT_19, 0x10000000);
1100 
1101 		ACCW(PGWHAT_1A, 0x00000000);
1102 		ACCW(PGWHAT_1B, 0x00000000);
1103 		ACCW(PGWHAT_1C, 0x00000000);
1104 		ACCW(PGWHAT_1D, 0x00000000);
1105 		ACCW(PGWHAT_1E, 0x00000000);
1106 		ACCW(PGWHAT_1F, 0x00000000);
1107 		ACCW(PGWHAT_20, 0x00000000);
1108 		ACCW(PGWHAT_21, 0x00000000);
1109 
1110 		ACCW(PGWHAT_22, 0x08000000);
1111 
1112 		ACCW(PGWHAT_23, 0x00000000);
1113 		ACCW(PGWHAT_24, 0x00000000);
1114 		ACCW(PGWHAT_25, 0x00000000);
1115 		ACCW(PGWHAT_26, 0x00000000);
1116 
1117 		ACCW(PGWHAT_27, 0x4b7fffff);
1118 
1119 		ACCW(PGWHAT_28, 0x00000000);
1120 		ACCW(PGWHAT_29, 0x00000000);
1121 		ACCW(PGWHAT_2A, 0x00000000);
1122 
1123 		/* setup window clipping */
1124 		/* b0-11 = min; b16-27 = max.
1125 		 * note:
1126 		 * probably two's complement values, so setting to max range here:
1127 		 * which would be -2048 upto/including +2047. */
1128 		/* horizontal */
1129 		ACCW(WINCLIP_H_0, 0x07ff0800);
1130 		ACCW(WINCLIP_H_1, 0x07ff0800);
1131 		ACCW(WINCLIP_H_2, 0x07ff0800);
1132 		ACCW(WINCLIP_H_3, 0x07ff0800);
1133 		ACCW(WINCLIP_H_4, 0x07ff0800);
1134 		ACCW(WINCLIP_H_5, 0x07ff0800);
1135 		ACCW(WINCLIP_H_6, 0x07ff0800);
1136 		ACCW(WINCLIP_H_7, 0x07ff0800);
1137 		/* vertical */
1138 		ACCW(WINCLIP_V_0, 0x07ff0800);
1139 		ACCW(WINCLIP_V_1, 0x07ff0800);
1140 		ACCW(WINCLIP_V_2, 0x07ff0800);
1141 		ACCW(WINCLIP_V_3, 0x07ff0800);
1142 		ACCW(WINCLIP_V_4, 0x07ff0800);
1143 		ACCW(WINCLIP_V_5, 0x07ff0800);
1144 		ACCW(WINCLIP_V_6, 0x07ff0800);
1145 		ACCW(WINCLIP_V_7, 0x07ff0800);
1146 
1147 		/* setup (initialize) pipe */
1148 		/* set eyetype to local, lightning etc. is off */
1149 		ACCW(NV10_XFMOD0, 0x10000000);
1150 		/* disable all lights */
1151 		ACCW(NV10_XFMOD1, 0x00000000);
1152 
1153 		/* note: upon writing data into the PIPEDAT register, the PIPEADR is
1154 		 * probably auto-incremented! */
1155 		/* (pipe adress = b2-16, pipe data = b0-31) */
1156 		ACCW(NV10_PIPEADR, 0x00006740);
1157 		ACCW(NV10_PIPEDAT, 0x00000000);
1158 		ACCW(NV10_PIPEDAT, 0x00000000);
1159 		ACCW(NV10_PIPEDAT, 0x00000000);
1160 		ACCW(NV10_PIPEDAT, 0x3f800000);
1161 
1162 		ACCW(NV10_PIPEADR, 0x00006750);
1163 		ACCW(NV10_PIPEDAT, 0x40000000);
1164 		ACCW(NV10_PIPEDAT, 0x40000000);
1165 		ACCW(NV10_PIPEDAT, 0x40000000);
1166 		ACCW(NV10_PIPEDAT, 0x40000000);
1167 
1168 		ACCW(NV10_PIPEADR, 0x00006760);
1169 		ACCW(NV10_PIPEDAT, 0x00000000);
1170 		ACCW(NV10_PIPEDAT, 0x00000000);
1171 		ACCW(NV10_PIPEDAT, 0x3f800000);
1172 		ACCW(NV10_PIPEDAT, 0x00000000);
1173 
1174 		ACCW(NV10_PIPEADR, 0x00006770);
1175 		ACCW(NV10_PIPEDAT, 0xc5000000);
1176 		ACCW(NV10_PIPEDAT, 0xc5000000);
1177 		ACCW(NV10_PIPEDAT, 0x00000000);
1178 		ACCW(NV10_PIPEDAT, 0x00000000);
1179 
1180 		ACCW(NV10_PIPEADR, 0x00006780);
1181 		ACCW(NV10_PIPEDAT, 0x00000000);
1182 		ACCW(NV10_PIPEDAT, 0x00000000);
1183 		ACCW(NV10_PIPEDAT, 0x3f800000);
1184 		ACCW(NV10_PIPEDAT, 0x00000000);
1185 
1186 		ACCW(NV10_PIPEADR, 0x000067a0);
1187 		ACCW(NV10_PIPEDAT, 0x3f800000);
1188 		ACCW(NV10_PIPEDAT, 0x3f800000);
1189 		ACCW(NV10_PIPEDAT, 0x3f800000);
1190 		ACCW(NV10_PIPEDAT, 0x3f800000);
1191 
1192 		ACCW(NV10_PIPEADR, 0x00006ab0);
1193 		ACCW(NV10_PIPEDAT, 0x3f800000);
1194 		ACCW(NV10_PIPEDAT, 0x3f800000);
1195 		ACCW(NV10_PIPEDAT, 0x3f800000);
1196 
1197 		ACCW(NV10_PIPEADR, 0x00006ac0);
1198 		ACCW(NV10_PIPEDAT, 0x00000000);
1199 		ACCW(NV10_PIPEDAT, 0x00000000);
1200 		ACCW(NV10_PIPEDAT, 0x00000000);
1201 
1202 		ACCW(NV10_PIPEADR, 0x00006c10);
1203 		ACCW(NV10_PIPEDAT, 0xbf800000);
1204 
1205 		ACCW(NV10_PIPEADR, 0x00007030);
1206 		ACCW(NV10_PIPEDAT, 0x7149f2ca);
1207 
1208 		ACCW(NV10_PIPEADR, 0x00007040);
1209 		ACCW(NV10_PIPEDAT, 0x7149f2ca);
1210 
1211 		ACCW(NV10_PIPEADR, 0x00007050);
1212 		ACCW(NV10_PIPEDAT, 0x7149f2ca);
1213 
1214 		ACCW(NV10_PIPEADR, 0x00007060);
1215 		ACCW(NV10_PIPEDAT, 0x7149f2ca);
1216 
1217 		ACCW(NV10_PIPEADR, 0x00007070);
1218 		ACCW(NV10_PIPEDAT, 0x7149f2ca);
1219 
1220 		ACCW(NV10_PIPEADR, 0x00007080);
1221 		ACCW(NV10_PIPEDAT, 0x7149f2ca);
1222 
1223 		ACCW(NV10_PIPEADR, 0x00007090);
1224 		ACCW(NV10_PIPEDAT, 0x7149f2ca);
1225 
1226 		ACCW(NV10_PIPEADR, 0x000070a0);
1227 		ACCW(NV10_PIPEDAT, 0x7149f2ca);
1228 
1229 		ACCW(NV10_PIPEADR, 0x00006a80);
1230 		ACCW(NV10_PIPEDAT, 0x00000000);
1231 		ACCW(NV10_PIPEDAT, 0x00000000);
1232 		ACCW(NV10_PIPEDAT, 0x3f800000);
1233 
1234 		ACCW(NV10_PIPEADR, 0x00006aa0);
1235 		ACCW(NV10_PIPEDAT, 0x00000000);
1236 		ACCW(NV10_PIPEDAT, 0x00000000);
1237 		ACCW(NV10_PIPEDAT, 0x00000000);
1238 
1239 		ACCW(NV10_PIPEADR, 0x00000040);
1240 		ACCW(NV10_PIPEDAT, 0x00000005);
1241 
1242 		ACCW(NV10_PIPEADR, 0x00006400);
1243 		ACCW(NV10_PIPEDAT, 0x3f800000);
1244 		ACCW(NV10_PIPEDAT, 0x3f800000);
1245 		ACCW(NV10_PIPEDAT, 0x4b7fffff);
1246 		ACCW(NV10_PIPEDAT, 0x00000000);
1247 
1248 		ACCW(NV10_PIPEADR, 0x00006410);
1249 		ACCW(NV10_PIPEDAT, 0xc5000000);
1250 		ACCW(NV10_PIPEDAT, 0xc5000000);
1251 		ACCW(NV10_PIPEDAT, 0x00000000);
1252 		ACCW(NV10_PIPEDAT, 0x00000000);
1253 
1254 		ACCW(NV10_PIPEADR, 0x00006420);
1255 		ACCW(NV10_PIPEDAT, 0x00000000);
1256 		ACCW(NV10_PIPEDAT, 0x00000000);
1257 		ACCW(NV10_PIPEDAT, 0x00000000);
1258 		ACCW(NV10_PIPEDAT, 0x00000000);
1259 
1260 		ACCW(NV10_PIPEADR, 0x00006430);
1261 		ACCW(NV10_PIPEDAT, 0x00000000);
1262 		ACCW(NV10_PIPEDAT, 0x00000000);
1263 		ACCW(NV10_PIPEDAT, 0x00000000);
1264 		ACCW(NV10_PIPEDAT, 0x00000000);
1265 
1266 		ACCW(NV10_PIPEADR, 0x000064c0);
1267 		ACCW(NV10_PIPEDAT, 0x3f800000);
1268 		ACCW(NV10_PIPEDAT, 0x3f800000);
1269 		ACCW(NV10_PIPEDAT, 0x477fffff);
1270 		ACCW(NV10_PIPEDAT, 0x3f800000);
1271 
1272 		ACCW(NV10_PIPEADR, 0x000064d0);
1273 		ACCW(NV10_PIPEDAT, 0xc5000000);
1274 		ACCW(NV10_PIPEDAT, 0xc5000000);
1275 		ACCW(NV10_PIPEDAT, 0x00000000);
1276 		ACCW(NV10_PIPEDAT, 0x00000000);
1277 
1278 		ACCW(NV10_PIPEADR, 0x000064e0);
1279 		ACCW(NV10_PIPEDAT, 0xc4fff000);
1280 		ACCW(NV10_PIPEDAT, 0xc4fff000);
1281 		ACCW(NV10_PIPEDAT, 0x00000000);
1282 		ACCW(NV10_PIPEDAT, 0x00000000);
1283 
1284 		ACCW(NV10_PIPEADR, 0x000064f0);
1285 		ACCW(NV10_PIPEDAT, 0x00000000);
1286 		ACCW(NV10_PIPEDAT, 0x00000000);
1287 		ACCW(NV10_PIPEDAT, 0x00000000);
1288 		ACCW(NV10_PIPEDAT, 0x00000000);
1289 
1290 		/* turn lightning on */
1291 		ACCW(NV10_XFMOD0, 0x30000000);
1292 		/* set light 1 to infinite type, other lights remain off */
1293 		ACCW(NV10_XFMOD1, 0x00000004);
1294 
1295 		/* Z-buffer state is:
1296 		 * initialized, set to: 'fixed point' (integer?); Z-buffer; 16bits depth */
1297 		/* note:
1298 		 * other options possible are: floating point; 24bits depth; W-buffer(?) */
1299 		ACCW(GLOB_STAT_0, 0x10000000);
1300 		/* set DMA instance 2 and 3 to be invalid */
1301 		ACCW(GLOB_STAT_1, 0x00000000);
1302 	}
1303 }
1304 
1305 /* fixme? (check this out..)
1306  * Looks like this stuff can be very much simplified and speed-up, as it seems it's not
1307  * nessesary to wait for the engine to become idle before re-assigning channels.
1308  * Because the cmd handles are actually programmed _inside_ the fifo channels, it might
1309  * well be that the assignment is buffered along with the commands that still have to
1310  * be executed!
1311  * (sounds very plausible to me :) */
1312 void nv_acc_assert_fifo(void)
1313 {
1314 	/* does every engine cmd this accelerant needs have a FIFO channel? */
1315 	//fixme: can probably be optimized for both speed and channel selection...
1316 	if (!si->engine.fifo.ch_ptr[NV_ROP5_SOLID] ||
1317 		!si->engine.fifo.ch_ptr[NV_IMAGE_BLACK_RECTANGLE] ||
1318 		!si->engine.fifo.ch_ptr[NV_IMAGE_PATTERN] ||
1319 		!si->engine.fifo.ch_ptr[NV_IMAGE_BLIT] ||
1320 		!si->engine.fifo.ch_ptr[NV4_GDI_RECTANGLE_TEXT])
1321 	{
1322 		uint16 cnt;
1323 
1324 		/* no, wait until the engine is idle before re-assigning the FIFO */
1325 		nv_acc_wait_idle();
1326 
1327 		/* free the FIFO channels we want from the currently assigned cmd's */
1328 		si->engine.fifo.ch_ptr[si->engine.fifo.handle[0]] = 0;
1329 		si->engine.fifo.ch_ptr[si->engine.fifo.handle[1]] = 0;
1330 		si->engine.fifo.ch_ptr[si->engine.fifo.handle[2]] = 0;
1331 		si->engine.fifo.ch_ptr[si->engine.fifo.handle[4]] = 0;
1332 		si->engine.fifo.ch_ptr[si->engine.fifo.handle[5]] = 0;
1333 
1334 		/* set new object handles */
1335 		si->engine.fifo.handle[0] = NV_ROP5_SOLID;
1336 		si->engine.fifo.handle[1] = NV_IMAGE_BLACK_RECTANGLE;
1337 		si->engine.fifo.handle[2] = NV_IMAGE_PATTERN;
1338 		si->engine.fifo.handle[4] = NV_IMAGE_BLIT;
1339 		si->engine.fifo.handle[5] = NV4_GDI_RECTANGLE_TEXT;
1340 
1341 		/* set handle's pointers to their assigned FIFO channels */
1342 		for (cnt = 0; cnt < 0x08; cnt++)
1343 		{
1344 			si->engine.fifo.ch_ptr[(si->engine.fifo.handle[cnt])] =
1345 				(NVACC_FIFO + (cnt * 0x00002000));
1346 		}
1347 
1348 		/* program new FIFO assignments */
1349 		ACCW(FIFO_CH0, (0x80000000 | si->engine.fifo.handle[0])); /* Raster OPeration */
1350 		ACCW(FIFO_CH1, (0x80000000 | si->engine.fifo.handle[1])); /* Clip */
1351 		ACCW(FIFO_CH2, (0x80000000 | si->engine.fifo.handle[2])); /* Pattern */
1352 		ACCW(FIFO_CH4, (0x80000000 | si->engine.fifo.handle[4])); /* Blit */
1353 		ACCW(FIFO_CH5, (0x80000000 | si->engine.fifo.handle[5])); /* Bitmap */
1354 	}
1355 
1356 	/* update our local pointers */
1357 	nv_rop5_solid_ptr = (cmd_nv_rop5_solid*)
1358 		&(regs[(si->engine.fifo.ch_ptr[NV_ROP5_SOLID]) >> 2]);
1359 
1360 	nv_image_black_rectangle_ptr = (cmd_nv_image_black_rectangle*)
1361 		&(regs[(si->engine.fifo.ch_ptr[NV_IMAGE_BLACK_RECTANGLE]) >> 2]);
1362 
1363 	nv_image_pattern_ptr = (cmd_nv_image_pattern*)
1364 		&(regs[(si->engine.fifo.ch_ptr[NV_IMAGE_PATTERN]) >> 2]);
1365 
1366 	nv_image_blit_ptr = (cmd_nv_image_blit*)
1367 		&(regs[(si->engine.fifo.ch_ptr[NV_IMAGE_BLIT]) >> 2]);
1368 
1369 	nv3_gdi_rectangle_text_ptr = (cmd_nv3_gdi_rectangle_text*)
1370 		&(regs[(si->engine.fifo.ch_ptr[NV4_GDI_RECTANGLE_TEXT]) >> 2]);
1371 }
1372 
1373 /* screen to screen blit - i.e. move windows around and scroll within them. */
1374 status_t nv_acc_setup_blit()
1375 {
1376 	/* setup solid pattern:
1377 	 * wait for room in fifo for pattern cmd if needed.
1378 	 * (fifo holds 256 32bit words: count those, not bytes) */
1379 	while (((nv_image_pattern_ptr->FifoFree) >> 2) < 5)
1380 	{
1381 		/* snooze a bit so I do not hammer the bus */
1382 		snooze (10);
1383 	}
1384 	/* now setup pattern (writing 5 32bit words) */
1385 	nv_image_pattern_ptr->SetShape = 0x00000000; /* 0 = 8x8, 1 = 64x1, 2 = 1x64 */
1386 	nv_image_pattern_ptr->SetColor0 = 0xffffffff;
1387 	nv_image_pattern_ptr->SetColor1 = 0xffffffff;
1388 	nv_image_pattern_ptr->SetPattern[0] = 0xffffffff;
1389 	nv_image_pattern_ptr->SetPattern[1] = 0xffffffff;
1390 
1391 	/* ROP registers (Raster OPeration):
1392 	 * wait for room in fifo for ROP cmd if needed.
1393 	 * (fifo holds 256 32bit words: count those, not bytes) */
1394 	while (((nv_rop5_solid_ptr->FifoFree) >> 2) < 1)
1395 	{
1396 		/* snooze a bit so I do not hammer the bus */
1397 		snooze (10);
1398 	}
1399 	/* now setup ROP (writing 1 32bit word) */
1400 	nv_rop5_solid_ptr->SetRop5 = 0xcc;
1401 
1402 	return B_OK;
1403 }
1404 
1405 status_t nv_acc_blit(uint16 xs,uint16 ys,uint16 xd,uint16 yd,uint16 w,uint16 h)
1406 {
1407 	/* Note: blit-copy direction is determined inside riva hardware: no setup needed */
1408 
1409 	/* instruct engine what to blit:
1410 	 * wait for room in fifo for blit cmd if needed.
1411 	 * (fifo holds 256 32bit words: count those, not bytes) */
1412 	while (((nv_image_blit_ptr->FifoFree) >> 2) < 3)
1413 	{
1414 		/* snooze a bit so I do not hammer the bus */
1415 		snooze (10);
1416 	}
1417 	/* now setup blit (writing 3 32bit words) */
1418 	nv_image_blit_ptr->SourceOrg = ((ys << 16) | xs);
1419 	nv_image_blit_ptr->DestOrg = ((yd << 16) | xd);
1420 	nv_image_blit_ptr->HeightWidth = (((h + 1) << 16) | (w + 1));
1421 
1422 	return B_OK;
1423 }
1424 
1425 /* rectangle fill - i.e. workspace and window background color */
1426 /* span fill - i.e. (selected) menuitem background color (Dano) */
1427 status_t nv_acc_setup_rectangle(uint32 color)
1428 {
1429 	/* setup solid pattern:
1430 	 * wait for room in fifo for pattern cmd if needed.
1431 	 * (fifo holds 256 32bit words: count those, not bytes) */
1432 	while (((nv_image_pattern_ptr->FifoFree) >> 2) < 5)
1433 	{
1434 		/* snooze a bit so I do not hammer the bus */
1435 		snooze (10);
1436 	}
1437 	/* now setup pattern (writing 5 32bit words) */
1438 	nv_image_pattern_ptr->SetShape = 0x00000000; /* 0 = 8x8, 1 = 64x1, 2 = 1x64 */
1439 	nv_image_pattern_ptr->SetColor0 = 0xffffffff;
1440 	nv_image_pattern_ptr->SetColor1 = 0xffffffff;
1441 	nv_image_pattern_ptr->SetPattern[0] = 0xffffffff;
1442 	nv_image_pattern_ptr->SetPattern[1] = 0xffffffff;
1443 
1444 	/* ROP registers (Raster OPeration):
1445 	 * wait for room in fifo for ROP cmd if needed.
1446 	 * (fifo holds 256 32bit words: count those, not bytes) */
1447 	while (((nv_rop5_solid_ptr->FifoFree) >> 2) < 1)
1448 	{
1449 		/* snooze a bit so I do not hammer the bus */
1450 		snooze (10);
1451 	}
1452 	/* now setup ROP (writing 1 32bit word) for GXcopy */
1453 	nv_rop5_solid_ptr->SetRop5 = 0xcc;
1454 
1455 	/* setup fill color:
1456 	 * wait for room in fifo for bitmap cmd if needed.
1457 	 * (fifo holds 256 32bit words: count those, not bytes) */
1458 	while (((nv3_gdi_rectangle_text_ptr->FifoFree) >> 2) < 1)
1459 	{
1460 		/* snooze a bit so I do not hammer the bus */
1461 		snooze (10);
1462 	}
1463 	/* now setup color (writing 1 32bit word) */
1464 	nv3_gdi_rectangle_text_ptr->Color1A = color;
1465 
1466 	return B_OK;
1467 }
1468 
1469 status_t nv_acc_rectangle(uint32 xs,uint32 xe,uint32 ys,uint32 yl)
1470 {
1471 	/* instruct engine what to fill:
1472 	 * wait for room in fifo for bitmap cmd if needed.
1473 	 * (fifo holds 256 32bit words: count those, not bytes) */
1474 	while (((nv3_gdi_rectangle_text_ptr->FifoFree) >> 2) < 2)
1475 	{
1476 		/* snooze a bit so I do not hammer the bus */
1477 		snooze (10);
1478 	}
1479 	/* now setup fill (writing 2 32bit words) */
1480 	nv3_gdi_rectangle_text_ptr->UnclippedRectangle[0].LeftTop =
1481 		((xs << 16) | (ys & 0x0000ffff));
1482 	nv3_gdi_rectangle_text_ptr->UnclippedRectangle[0].WidthHeight =
1483 		(((xe - xs) << 16) | (yl & 0x0000ffff));
1484 
1485 	return B_OK;
1486 }
1487 
1488 /* rectangle invert - i.e. text cursor and text selection */
1489 status_t nv_acc_setup_rect_invert()
1490 {
1491 	/* setup solid pattern:
1492 	 * wait for room in fifo for pattern cmd if needed.
1493 	 * (fifo holds 256 32bit words: count those, not bytes) */
1494 	while (((nv_image_pattern_ptr->FifoFree) >> 2) < 5)
1495 	{
1496 		/* snooze a bit so I do not hammer the bus */
1497 		snooze (10);
1498 	}
1499 	/* now setup pattern (writing 5 32bit words) */
1500 	nv_image_pattern_ptr->SetShape = 0x00000000; /* 0 = 8x8, 1 = 64x1, 2 = 1x64 */
1501 	nv_image_pattern_ptr->SetColor0 = 0xffffffff;
1502 	nv_image_pattern_ptr->SetColor1 = 0xffffffff;
1503 	nv_image_pattern_ptr->SetPattern[0] = 0xffffffff;
1504 	nv_image_pattern_ptr->SetPattern[1] = 0xffffffff;
1505 
1506 	/* ROP registers (Raster OPeration):
1507 	 * wait for room in fifo for ROP cmd if needed.
1508 	 * (fifo holds 256 32bit words: count those, not bytes) */
1509 	while (((nv_rop5_solid_ptr->FifoFree) >> 2) < 1)
1510 	{
1511 		/* snooze a bit so I do not hammer the bus */
1512 		snooze (10);
1513 	}
1514 	/* now setup ROP (writing 1 32bit word) for GXinvert */
1515 	nv_rop5_solid_ptr->SetRop5 = 0x55;
1516 
1517 	/* reset fill color:
1518 	 * wait for room in fifo for bitmap cmd if needed.
1519 	 * (fifo holds 256 32bit words: count those, not bytes) */
1520 	while (((nv3_gdi_rectangle_text_ptr->FifoFree) >> 2) < 1)
1521 	{
1522 		/* snooze a bit so I do not hammer the bus */
1523 		snooze (10);
1524 	}
1525 	/* now reset color (writing 1 32bit word) */
1526 	nv3_gdi_rectangle_text_ptr->Color1A = 0x00000000;
1527 
1528 	return B_OK;
1529 }
1530 
1531 status_t nv_acc_rectangle_invert(uint32 xs,uint32 xe,uint32 ys,uint32 yl)
1532 {
1533 	/* instruct engine what to invert:
1534 	 * wait for room in fifo for bitmap cmd if needed.
1535 	 * (fifo holds 256 32bit words: count those, not bytes) */
1536 	while (((nv3_gdi_rectangle_text_ptr->FifoFree) >> 2) < 2)
1537 	{
1538 		/* snooze a bit so I do not hammer the bus */
1539 		snooze (10);
1540 	}
1541 	/* now setup invert (writing 2 32bit words) */
1542 	nv3_gdi_rectangle_text_ptr->UnclippedRectangle[0].LeftTop =
1543 		((xs << 16) | (ys & 0x0000ffff));
1544 	nv3_gdi_rectangle_text_ptr->UnclippedRectangle[0].WidthHeight =
1545 		(((xe - xs) << 16) | (yl & 0x0000ffff));
1546 
1547 	return B_OK;
1548 }
1549 
1550 /* screen to screen tranparent blit */
1551 status_t nv_acc_transparent_blit(uint16 xs,uint16 ys,uint16 xd,uint16 yd,uint16 w,uint16 h,uint32 colour)
1552 {
1553 	//fixme: implement.
1554 
1555 	return B_ERROR;
1556 }
1557 
1558 /* screen to screen scaled filtered blit - i.e. scale video in memory */
1559 status_t nv_acc_video_blit(uint16 xs,uint16 ys,uint16 ws, uint16 hs,
1560 	uint16 xd,uint16 yd,uint16 wd,uint16 hd)
1561 {
1562 	//fixme: implement.
1563 
1564 	return B_ERROR;
1565 }
1566