xref: /haiku/src/add-ons/accelerants/matrox/engine/mga_acc.c (revision 4f00613311d0bd6b70fa82ce19931c41f071ea4e)
1 /* MGA Acceleration functions */
2 /* Authors:
3    Mark Watson 2/2000,
4    Rudolf Cornelissen 10/2002-1/2004.
5 */
6 
7 #define MODULE_BIT 0x00080000
8 
9 #include "mga_std.h"
10 
11 /*acceleration notes*/
12 
13 /*functions Be's app_server uses:
14 fill span (horizontal only)
15 fill rectangle (these 2 are very similar)
16 invert rectangle
17 blit
18 */
19 
20 /* needed by MIL 1/2 because of adress linearisation constraints */
21 #define ACCW_YDSTLEN(dst, len) do { \
22 	if (si->engine.y_lin) { \
23 		ACCW(YDST,((dst)* (si->fbc.bytes_per_row / (si->engine.depth >> 3))) >> 5); \
24 		ACCW(LEN,len); \
25 	} else ACCW(YDSTLEN,((dst)<<16)|(len)); \
26 } while (0)
27 
28 status_t gx00_acc_wait_idle()
29 {
30 	/* wait until engine completely idle */
31 	while (ACCR(STATUS) & 0x00010000)
32 	{
33 		/* snooze a bit so I do not hammer the bus */
34 		snooze (100);
35 	}
36 
37 	return B_OK;
38 }
39 
40 /* AFAIK this must be done for every new screenmode.
41  * Engine required init. */
42 status_t gx00_acc_init()
43 {
44 	/* used for convenience: MACCESS is a write only register! */
45 	uint32 maccess = 0x00000000;
46 	/* if we were unable to read PINS, we have to assume something (keeping bit6 zero) */
47 	if ((si->ps.card_type >= G450) && (si->ps.pins_status = B_OK))
48 	{
49 		/* b7 v5_mem_type = done by Mark Watson. fixme: still confirm! (unknown bits) */
50 		maccess |= ((((uint32)si->ps.v5_mem_type) & 0x80) >> 1);
51 	}
52 
53 	/* preset using hardware adress linearisation */
54 	si->engine.y_lin = 0x00;
55 	/* reset depth */
56 	si->engine.depth = 0;
57 
58 	/* cleanup bitblt */
59 	ACCW(OPMODE,0);
60 
61 	/* Set the Z origin to the start of FB (otherwise lockup on blits) */
62 	ACCW(ZORG,0);
63 
64 	/* Set pixel width */
65 	switch(si->dm.space)
66 	{
67 	case B_CMAP8:
68 		ACCW(MACCESS, ((maccess & 0xfffffffc) | 0x00));
69 		si->engine.depth = 8;
70 		break;
71 	case B_RGB15_LITTLE:case B_RGB16_LITTLE:
72 		ACCW(MACCESS, ((maccess & 0xfffffffc) | 0x01));
73 		si->engine.depth = 16;
74 		break;
75 	case B_RGB32_LITTLE:case B_RGBA32_LITTLE:
76 		ACCW(MACCESS, ((maccess & 0xfffffffc) | 0x02));
77 		si->engine.depth = 32;
78 		break;
79 	default:
80 		LOG(8,("ACC: init, invalid bit depth\n"));
81 		return B_ERROR;
82 	}
83 
84 	/* setup PITCH: very cardtype specific! */
85 	switch (si->ps.card_type)
86 	{
87 	case MIL1:
88 		switch (si->fbc.bytes_per_row / (si->engine.depth >> 3))
89 		{
90 			case 640:
91 			case 768:
92 			case 800:
93 			case 960:
94 			case 1024:
95 			case 1152:
96 			case 1280:
97 			case 1600:
98 			case 1920:
99 			case 2048:
100 				/* we are using hardware adress linearisation */
101 				break;
102 			default:
103 				/* we are using software adress linearisation */
104 				si->engine.y_lin = 0x01;
105 				LOG(8,("ACC: using software adress linearisation\n"));
106 				break;
107 		}
108 		ACCW(PITCH, (si->engine.y_lin << 15) |
109 					((si->fbc.bytes_per_row / (si->engine.depth >> 3)) & 0x0FFF));
110 		break;
111 	case MIL2:
112 		switch (si->fbc.bytes_per_row / (si->engine.depth >> 3))
113 		{
114 			case 512:
115 			case 640:
116 			case 768:
117 			case 800:
118 			case 832:
119 			case 960:
120 			case 1024:
121 			case 1152:
122 			case 1280:
123 			case 1600:
124 			case 1664:
125 			case 1920:
126 			case 2048:
127 				/* we are using hardware adress linearisation */
128 				break;
129 			default:
130 				/* we are using software adress linearisation */
131 				si->engine.y_lin = 0x01;
132 				LOG(8,("ACC: using software adress linearisation\n"));
133 				break;
134 		}
135 		ACCW(PITCH, (si->engine.y_lin << 15) |
136 					((si->fbc.bytes_per_row / (si->engine.depth >> 3)) & 0x0FFF));
137 		break;
138 	case G100:
139 		/* always using hardware adress linearisation, because 2D/3D
140 		 * engine works on every pitch multiple of 32 */
141 		ACCW(PITCH, ((si->fbc.bytes_per_row / (si->engine.depth >> 3)) & 0x0FFF));
142 		break;
143 	default:
144 		/* G200 and up are equal.. */
145 		/* always using hardware adress linearisation, because 2D/3D
146 		 * engine works on every pitch multiple of 32 */
147 		ACCW(PITCH, ((si->fbc.bytes_per_row / (si->engine.depth >> 3)) & 0x1FFF));
148 		break;
149 	}
150 
151 	/* disable plane write mask (needed for SDRAM): actual change needed to get it sent to RAM */
152 	ACCW(PLNWT,0x00000000);
153 	ACCW(PLNWT,0xffffffff);
154 
155 	if (si->ps.card_type >= G200) {
156 		/*DSTORG - location of active screen in framebuffer*/
157 		ACCW(DSTORG,((uint8*)si->fbc.frame_buffer) - ((uint8*)si->framebuffer));
158 
159 		/*SRCORG - init source address - same as dest*/
160 		ACCW(SRCORG,((uint8*)si->fbc.frame_buffer) - ((uint8*)si->framebuffer));
161 	}
162 
163 	/* init YDSTORG - apsed, if not inited, BitBlts may fails on <= G200 */
164 	si->engine.src_dst = 0;
165 	ACCW(YDSTORG, si->engine.src_dst);
166 
167 	/* <= G100 uses this register as SRCORG/DSTORG replacement, but
168 	 * MIL 1/2 does not need framebuffer space for the hardcursor! */
169 	if ((si->ps.card_type == G100) && (si->settings.hardcursor))
170 	{
171 		switch (si->dm.space)
172 		{
173 			case B_CMAP8:
174 				si->engine.src_dst = 1024 / 1;
175 				break;
176 			case B_RGB15_LITTLE:
177 			case B_RGB16_LITTLE:
178 				si->engine.src_dst = 1024 / 2;
179 				break;
180 			case B_RGB32_LITTLE:
181 				si->engine.src_dst =  1024 / 4;
182 				break;
183 			default:
184 				LOG(8,("ACC: G100 hardcursor not supported for current colorspace\n"));
185 				return B_ERROR;
186 		}
187 	}
188 	ACCW(YDSTORG, si->engine.src_dst);
189 
190 	/* clipping */
191 	/* i.e. highest and lowest X pixel adresses */
192 	ACCW(CXBNDRY,(((si->fbc.bytes_per_row / (si->engine.depth >> 3)) - 1) << 16) | (0));
193 
194 	/* Y pixel addresses must be linear */
195 	/* lowest adress */
196 	ACCW(YTOP, 0 + si->engine.src_dst);
197 	/* highest adress */
198 	ACCW(YBOT,((si->dm.virtual_height - 1) *
199 		(si->fbc.bytes_per_row / (si->engine.depth >> 3))) + si->engine.src_dst);
200 
201 	return B_OK;
202 }
203 
204 /* screen to screen blit - i.e. move windows around.
205  * Engine function bitblit, paragraph 4.5.7.2 */
206 status_t gx00_acc_blit(uint16 xs,uint16 ys,uint16 xd,uint16 yd,uint16 w,uint16 h)
207 {
208 	uint32 t_start,t_end,offset;
209 	uint32 b_start,b_end;
210 
211 	/*find where the top,bottom and offset are*/
212 	offset = (si->fbc.bytes_per_row / (si->engine.depth >> 3));
213 
214 	t_end = t_start = xs + (offset*ys) + si->engine.src_dst;
215 	t_end += w;
216 
217 	b_end = b_start = xs + (offset*(ys+h)) + si->engine.src_dst;
218 	b_end +=w;
219 
220 	/* sgnzero bit _must_ be '0' before accessing SGN! */
221 	ACCW(DWGCTL,0x00000000);
222 
223 	/*find which quadrant */
224 	switch((yd>ys)|((xd>xs)<<1))
225 	{
226 	case 0: /*L->R,down*/
227 		ACCW(SGN,0);
228 
229 		ACCW(AR3,t_start);
230 		ACCW(AR0,t_end);
231 		ACCW(AR5,offset);
232 
233 		ACCW_YDSTLEN(yd,h+1);
234 		break;
235 	case 1: /*L->R,up*/
236 		ACCW(SGN,4);
237 
238 		ACCW(AR3,b_start);
239 		ACCW(AR0,b_end);
240 		ACCW(AR5,-offset);
241 
242 		ACCW_YDSTLEN(yd+h,h+1);
243 		break;
244 	case 2: /*R->L,down*/
245 		ACCW(SGN,1);
246 
247 		ACCW(AR3,t_end);
248 		ACCW(AR0,t_start);
249 		ACCW(AR5,offset);
250 
251 		ACCW_YDSTLEN(yd,h+1);
252 		break;
253 	case 3: /*R->L,up*/
254 		ACCW(SGN,5);
255 
256 		ACCW(AR3,b_end);
257 		ACCW(AR0,b_start);
258 		ACCW(AR5,-offset);
259 
260 		ACCW_YDSTLEN(yd+h,h+1);
261 		break;
262 	}
263 	ACCW(FXBNDRY,((xd+w)<<16)|xd);
264 
265 	/*do the blit*/
266 	ACCGO(DWGCTL,0x040C4018); // atype RSTR
267 
268 	return B_OK;
269 }
270 
271 /* screen to screen tranparent blit - not sure what uses this.
272  * Engine function bitblit, paragraph 4.5.7.2 */
273 status_t gx00_acc_transparent_blit(uint16 xs,uint16 ys,uint16 xd,uint16 yd,uint16 w,uint16 h,uint32 colour)
274 {
275 	uint32 t_start,t_end,offset;
276 	uint32 b_start,b_end;
277 
278 	return B_ERROR;
279 
280 	/*find where the top,bottom and offset are*/
281 	offset = (si->fbc.bytes_per_row / (si->engine.depth >> 3));
282 
283 	t_end = t_start = xs + (offset*ys) + si->engine.src_dst;
284 	t_end += w;
285 
286 	b_end = b_start = xs + (offset*(ys+h)) + si->engine.src_dst;
287 	b_end +=w;
288 
289 	/* sgnzero bit _must_ be '0' before accessing SGN! */
290 	ACCW(DWGCTL,0x00000000);
291 
292 	/*find which quadrant */
293 	switch((yd>ys)|((xd>xs)<<1))
294 	{
295 	case 0: /*L->R,down*/
296 		ACCW(SGN,0);
297 
298 		ACCW(AR3,t_start);
299 		ACCW(AR0,t_end);
300 		ACCW(AR5,offset);
301 
302 		ACCW_YDSTLEN(yd,h+1);
303 		break;
304 	case 1: /*L->R,up*/
305 		ACCW(SGN,4);
306 
307 		ACCW(AR3,b_start);
308 		ACCW(AR0,b_end);
309 		ACCW(AR5,-offset);
310 
311 		ACCW_YDSTLEN(yd+h,h+1);
312 		break;
313 	case 2: /*R->L,down*/
314 		ACCW(SGN,1);
315 
316 		ACCW(AR3,t_end);
317 		ACCW(AR0,t_start);
318 		ACCW(AR5,offset);
319 
320 		ACCW_YDSTLEN(yd,h+1);
321 		break;
322 	case 3: /*R->L,up*/
323 		ACCW(SGN,5);
324 
325 		ACCW(AR3,b_end);
326 		ACCW(AR0,b_start);
327 		ACCW(AR5,-offset);
328 
329 		ACCW_YDSTLEN(yd+h,h+1);
330 		break;
331 	}
332 	ACCW(FXBNDRY,((xd+w)<<16)|xd);
333 
334 	/*do the blit*/
335 	ACCW(FCOL,colour);
336 	ACCW(BCOL,0xffffffff);
337 	ACCGO(DWGCTL,0x440C4018); // atype RSTR
338 	return B_OK;
339 }
340 
341 /* rectangle fill.
342  * Engine function rectangle_fill: paragraph 4.5.5.2 */
343 /*colorIndex,fill_rect_params,count*/
344 status_t gx00_acc_rectangle(uint32 xs,uint32 xe,uint32 ys,uint32 yl,uint32 col)
345 {
346 /*
347 	FXBNDRY - left and right coordinates    a
348 	YDSTLEN - y start and no of lines       a
349 	(or YDST and LEN)
350 	DWGCTL - atype must be RSTR or BLK      a
351 	FCOL - foreground colour                a
352 */
353 
354 	ACCW(FXBNDRY,(xe<<16)|xs); /*set x start and end*/
355 	ACCW_YDSTLEN(ys,yl); /*set y start and length*/
356 	ACCW(FCOL,col);            /*set colour*/
357 
358 //acc fixme: checkout blockmode constraints for G100+ (mil: nc?): also add blockmode
359 //	         for other functions, and use fastblt on MIL1/2 if possible...
360 //or is CMAP8 contraint a non-blockmode contraint? (linearisation problem maybe?)
361 	if (si->dm.space==B_CMAP8 || si->ps.sdram)
362 	{
363 		ACCGO(DWGCTL,0x400C7814); // atype RSTR
364 	}
365 	else
366 	{
367 		ACCGO(DWGCTL,0x400C7844); // atype BLK
368 	}
369 	return B_OK;
370 }
371 
372 /* rectangle invert.
373  * Engine function rectangle_fill: paragraph 4.5.5.2 */
374 /*colorIndex,fill_rect_params,count*/
375 status_t gx00_acc_rectangle_invert(uint32 xs,uint32 xe,uint32 ys,uint32 yl,uint32 col)
376 {
377 //	int i;
378 //	uint32 * dma;
379 //	uint32 pci;
380 /*
381 	FXBNDRY - left and right coordinates    a
382 	YDSTLEN - y start and no of lines       a
383 	(or YDST and LEN)
384 	DWGCTL - atype must be RSTR or BLK      a
385 	FCOL - foreground colour                a
386 */
387 
388 	ACCW(FXBNDRY,(xe<<16)|xs); /*set x start and end*/
389 	ACCW_YDSTLEN(ys,yl); /*set y start and length*/
390 	ACCW(FCOL,col);            /*set colour*/
391 
392 	/*draw it! top nibble is c is clipping enabled*/
393 	ACCGO(DWGCTL,0x40057814); // atype RSTR
394 
395 	/*pseudo_dma version!*/
396 //MGAACC_DWGCTL      =0x1C00,
397 //MGAACC_FCOL        =0x1C24,
398 //MGAACC_FXBNDRY     =0x1C84,
399 //MGAACC_YDSTLEN     =0x1C88,
400 //
401 //40,09,21,22 (ordered as registers)
402 
403 //	dma = (uint32 *)si->pseudo_dma;
404 //	*dma++=0x40092221;
405 //	*dma++=(xe<<16)|xs;
406 //	*dma++=(ys<<16)|yl;
407 //	*dma++=col;
408 //	*dma++=0x40057814;
409 
410 	/*real dma version!*/
411 //	dma = (vuint32 *)si->dma_buffer;
412 //	*dma++=0x40092221;/*indices*/
413 //	*dma++=(xe<<16)|xs;
414 //	*dma++=(ys<<16)|yl;
415 //	*dma++=col;
416 //	*dma++=0x40057814;
417 
418 //	pci = si->dma_buffer_pci;
419 //	ACCW(PRIMADDRESS,(pci));
420 //	ACCW(PRIMEND,(20+pci));
421 
422 //	delay(100);
423 
424 	return B_OK;
425 }
426 
427 /* screen to screen scaled filtered blit - i.e. scale video in memory.
428  * Engine function texture mapping for video, paragraphs 4.5.5.5 - 4.5.5.9 */
429 status_t gx00_acc_video_blit(uint16 xs,uint16 ys,uint16 ws, uint16 hs,
430 	uint16 xd,uint16 yd,uint16 wd,uint16 hd)
431 {
432 	//fixme: implement. Used for G450/G550 Desktop TVout...
433 	//fixme: see if MIL1 - G200 support this function as well...
434 
435 	return B_OK;
436 }
437