xref: /haiku/src/add-ons/accelerants/radeon/CP.c (revision 67bce78b48ed6d01b5a8eef89f5694c372b7e0a1)
1 /*
2 	Copyright (c) 2002, Thomas Kurschel
3 
4 
5 	Part of Radeon accelerant
6 
7 	Command Processor handling
8 */
9 
10 #include "radeon_accelerant.h"
11 #include "mmio.h"
12 #include "CPMicroCode.h"
13 #include "cp_regs.h"
14 #include "buscntrl_regs.h"
15 #include "utils.h"
16 #include <sys/ioctl.h>
17 
18 #include "log_coll.h"
19 #include "log_enum.h"
20 
21 #include <string.h>
22 
23 uint getAvailRingBuffer( accelerator_info *ai );
24 
25 
26 // non-local memory is used as following:
27 // - 0x10000 dwords for ring buffer
28 // - 8 dwords for returned data (i.e. current read ptr)
29 // - 6 dwords for "scratch registers"
30 //
31 // usage of scratch registers:
32 // - reg 0 = reached engine.count
33 //
34 // the ring buffer stuff must be at a constant offset as
35 // clones cannot be informed if it were changed
36 
37 
38 // upload Micro-Code of CP
39 static void loadMicroEngineRAMData( accelerator_info *ai )
40 {
41 	int i;
42 	const uint32 (*microcode)[2];
43 
44 	SHOW_FLOW0( 3, "" );
45 
46 	switch( ai->si->asic ) {
47 	case rt_r300:
48 	case rt_r300_4p:
49 	case rt_rv350:
50 	case rt_rv360:
51 	case rt_r350:
52 	case rt_r360:
53 		microcode = r300_cp_microcode;
54 		break;
55 	case rt_r200:
56 	//case rt_rv250:
57 	//case rt_m9:
58 		microcode = r200_cp_microcode;
59 		break;
60 	default:
61 		microcode = radeon_cp_microcode;
62 	}
63 
64 	Radeon_WaitForIdle( ai );
65 
66 	OUTREG( ai->regs, RADEON_CP_ME_RAM_ADDR, 0 );
67 
68 	for ( i = 0 ; i < 256 ; i++ ) {
69 		OUTREG( ai->regs, RADEON_CP_ME_RAM_DATAH, microcode[i][1] );
70 		OUTREG( ai->regs, RADEON_CP_ME_RAM_DATAL, microcode[i][0] );
71 	}
72 }
73 
74 // convert CPU's to graphics card's virtual address
75 #define CPU2GC( addr ) (((uint32)(addr) - (uint32)si->nonlocal_mem) + si->nonlocal_vm_start)
76 
77 // initialize bus mastering
78 static status_t setupCPRegisters( accelerator_info *ai, int aring_size )
79 {
80 	vuint8 *regs = ai->regs;
81 	shared_info *si = ai->si;
82 	uint32 tmp;
83 
84 #if 0
85 	{
86 		// allocate ring buffer etc. from local memory instead of PCI memory
87 		radeon_alloc_local_mem am;
88 
89 		am.magic = RADEON_PRIVATE_DATA_MAGIC;
90 		am.size = (aring_size + 14) * 4;
91 
92 		if( ioctl( ai->fd, RADEON_ALLOC_LOCAL_MEM, &am ) != B_OK )
93 			SHOW_ERROR0( 0, "Cannot allocate ring buffer from local memory" );
94 		else {
95 			si->nonlocal_vm_start = am.fb_offset;
96 			si->nonlocal_mem = (uint32 *)(si->framebuffer + am.fb_offset);
97 		}
98 	}
99 #endif
100 
101 	memset( &si->ring, 0, sizeof( si->ring ));
102 
103 	// set write pointer delay to zero;
104 	// we assume that memory synchronization is done correctly my MoBo
105 	// and Radeon_SendCP contains a hack that hopefully fixes such problems
106 	OUTREG( regs, RADEON_CP_RB_WPTR_DELAY, 0 );
107 
108 	// setup CP buffer
109 	si->ring.start = si->nonlocal_mem;
110 	si->ring.size = aring_size;
111 	OUTREG( regs, RADEON_CP_RB_BASE, CPU2GC( si->ring.start ));
112 	SHOW_INFO( 3, "CP buffer address=%lx", CPU2GC( si->ring.start ));
113 
114 	// setup CP read pointer buffer
115 	si->ring.head = si->ring.start + si->ring.size;
116 	OUTREG( regs, RADEON_CP_RB_RPTR_ADDR, CPU2GC( si->ring.head ));
117 	SHOW_INFO( 3, "CP read pointer buffer==%lx", CPU2GC( si->ring.head ));
118 
119 	// set ring buffer size
120 	// (it's log2 of qwords)
121 	OUTREG( regs, RADEON_CP_RB_CNTL, log2( si->ring.size / 2 ));
122 	SHOW_INFO( 3, "CP buffer size mask=%ld", log2( si->ring.size / 2 ) );
123 
124 	// set CP buffer pointers
125 	OUTREG( regs, RADEON_CP_RB_RPTR, 0 );
126 	OUTREG( regs, RADEON_CP_RB_WPTR, 0 );
127 	*si->ring.head = 0;
128 	si->ring.tail = 0;
129 
130 	// setup scratch register buffer
131 	si->scratch_ptr = si->ring.head + RADEON_SCRATCH_REG_OFFSET / sizeof( uint32 );
132 	OUTREG( regs, RADEON_SCRATCH_ADDR, CPU2GC( si->scratch_ptr ));
133 	OUTREG( regs, RADEON_SCRATCH_UMSK, 0x3f );
134 
135 	Radeon_WaitForIdle( ai );
136 
137 	// enable bus mastering
138 #if 1
139 	tmp = INREG( ai->regs, RADEON_BUS_CNTL ) & ~RADEON_BUS_MASTER_DIS;
140 	OUTREG( regs, RADEON_BUS_CNTL, tmp );
141 #endif
142 
143 	// sync units
144 	OUTREG( regs, RADEON_ISYNC_CNTL,
145 		(RADEON_ISYNC_ANY2D_IDLE3D |
146 		 RADEON_ISYNC_ANY3D_IDLE2D |
147 		 RADEON_ISYNC_WAIT_IDLEGUI |
148 		 RADEON_ISYNC_CPSCRATCH_IDLEGUI) );
149 
150 	return B_OK;
151 }
152 
153 
154 // get number of free entries in CP's ring buffer
155 uint getAvailRingBuffer( accelerator_info *ai )
156 {
157 	shared_info *si = ai->si;
158 	int space;
159 
160 //	space = *si->ring.head - si->ring.tail;
161 	space = INREG( ai->regs, RADEON_CP_RB_RPTR ) - si->ring.tail;
162 
163 	if( space <= 0 )
164 		space += si->ring.size;
165 
166 	// don't fill up the entire buffer as we cannot
167 	// distinguish between a full and an empty ring
168 	--space;
169 
170 	SHOW_FLOW( 4, "head=%ld, tail=%ld, space=%ld", *si->ring.head, si->ring.tail, space );
171 
172 	LOG1( si->log, _GetAvailRingBufferQueue, space );
173 
174 	return space;
175 }
176 
177 // initialize CP so it's ready for BM
178 status_t Radeon_InitCP( accelerator_info *ai )
179 {
180 //	shared_info *si = ai->si;
181 	status_t result;
182 
183 	SHOW_FLOW0( 3, "" );
184 
185 	// init raw CP
186 	loadMicroEngineRAMData( ai );
187 
188 	// do soft-reset
189 	Radeon_ResetEngine( ai );
190 
191 	// after warm-reset, the CP may still be active and thus react to
192 	// register writes during initialization unpredictably, so we better
193 	// stop it first
194 	OUTREG( ai->regs, RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIDIS_INDDIS );
195 	INREG( ai->regs, RADEON_CP_CSQ_CNTL );
196 
197 	// reset CP to make disabling active
198 	Radeon_ResetEngine( ai );
199 
200 	// setup CP memory ranges
201 	result = setupCPRegisters( ai, 0x10000 );
202 	if( result < 0 )
203 		return result;
204 
205 	// tell CP to use BM
206 	Radeon_WaitForIdle( ai );
207 	OUTREG( ai->regs, RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM );
208 
209 	// this may be a bit too much
210 	Radeon_SendPurgeCache( ai );
211 	Radeon_SendWaitUntilIdle( ai );
212 
213 	return B_OK;
214 }
215 
216 
217 // write to register via CP
218 void Radeon_WriteRegCP( accelerator_info *ai, uint32 reg, uint32 value )
219 {
220 	uint32 buffer[2];
221 
222 	SHOW_FLOW0( 4, "" );
223 
224 	LOG2( ai->si->log, _Radeon_WriteRegFifo, reg, value );
225 
226 	buffer[0] = CP_PACKET0( reg, 0 );
227 	buffer[1] = value;
228 
229 	Radeon_SendCP( ai, buffer, 2 );
230 }
231 
232 
233 // send packets to CP
234 void Radeon_SendCP( accelerator_info *ai, uint32 *buffer, uint32 num_dwords )
235 {
236 	shared_info *si = ai->si;
237 
238 	SHOW_FLOW( 4, "num_dwords=%d", num_dwords );
239 
240 	while( num_dwords > 0 ) {
241 		uint32 space;
242 		uint32 max_copy;
243 //		uint i;
244 
245 		space = getAvailRingBuffer( ai );
246 
247 		if( space == 0 )
248 			continue;
249 
250 		max_copy = min( space, num_dwords );
251 
252 #ifdef ENABLE_LOGGING
253 		for( i = 0; i < max_copy; ++i )
254 			LOG1( si->log, _Radeon_SendCP, buffer[i] );
255 #endif
256 
257 		if( si->ring.tail + max_copy >= si->ring.size ) {
258 			uint32 sub_len;
259 
260 			sub_len = si->ring.size - si->ring.tail;
261 			memcpy( si->ring.start + si->ring.tail, buffer, sub_len * sizeof( uint32 ));
262 			buffer += sub_len;
263 			num_dwords -= sub_len;
264 			max_copy -= sub_len;
265 			si->ring.tail = 0;
266 		}
267 
268 		memcpy( si->ring.start + si->ring.tail, buffer, max_copy * sizeof( uint32 ) );
269 		buffer += max_copy;
270 		num_dwords -= max_copy;
271 		if( si->ring.tail + max_copy < si->ring.size )
272 			si->ring.tail += max_copy;
273 		else
274 			si->ring.tail = 0;
275 	}
276 
277 	// some chipsets have problems with write buffers; effectively, the command
278 	// list we've just created gets delayed in some queue and the graphics chip
279 	// reads out-dated commands, which don't make sense and thus crash the
280 	// graphics card
281 
282 	// flush writes to ring
283 	// (this code is a bit of a overkill - currently, only some WinChip/Cyrix
284 	//  CPU's support out-of-order writes, but we are prepared)
285 	__asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory");
286 	// make sure the chipset has flushed its write buffer by
287 	// reading some uncached memory
288 	(void)*si->ring.head;
289 
290 	// now, the command list should really be written to memory,
291 	// so it's safe to instruct the graphics card to read it
292 	OUTREG( ai->regs, RADEON_CP_RB_WPTR, si->ring.tail );
293 
294 	// read from PCI bus to ensure correct posting
295 	INREG( ai->regs, RADEON_CP_RB_RPTR );
296 }
297