xref: /haiku/src/add-ons/kernel/drivers/graphics/radeon/CP_setup.c (revision cc7e844c12cbb4d60c80edac08a503d5cf872929)
1 /*
2 	Copyright (c) 2002, Thomas Kurschel
3 
4 
5 	Part of Radeon accelerant
6 
7 	CP initialization/sync/cleanup.
8 
9 	It also handles command buffer synchronization.
10 
11 	non-local memory is used as following:
12 	- 2048 dwords for ring buffer
13 	- 253 indirect buffers a 4k (1024 dwords)
14 	- 8 dwords for returned data (i.e. current read ptr)
15 	  & 6 dwords for "scratch registers"
16 
17 	usage of scratch registers:
18 	- reg 0 = reached engine.count
19 
20 	with a granularity of 4 KByte, we need 2+253+1=256 blocks, which is exactly 1 MB
21 */
22 
23 #include "radeon_driver.h"
24 #include "CPMicroCode.h"
25 #include "mmio.h"
26 #include "cp_regs.h"
27 #include "pll_regs.h"
28 #include "rbbm_regs.h"
29 #include "buscntrl_regs.h"
30 #include "config_regs.h"
31 #include "memcntrl_regs.h"
32 #include "utils.h"
33 #include "pll_access.h"
34 
35 #include "log_coll.h"
36 #include "log_enum.h"
37 
38 #include <string.h>
39 
40 #if 0
41 
42 // macros for user-space
43 
44 #define ALLOC_MEM( asize, mem_type, aglobal, handle, offset ) \
45 	{ \
46 		radeon_alloc_mem am; \
47 \
48 		am.magic = RADEON_PRIVATE_DATA_MAGIC; \
49 		am.size = (asize) * 4; \
50 		am.memory_type = (mt_nonlocal); \
51 		am.global = (aglobal); \
52 \
53 		res = ioctl( ai->fd, RADEON_ALLOC_MEM, &am ); \
54 		if( res == B_OK ) \
55 			*(handle) = am.handle; \
56 			*(offset) = am.offset; \
57 	}
58 
59 #define MEM2CPU( mem ) \
60 	((uint32 *)(ai->mapped_memory[(mem).memory_type].data + (mem).offset))
61 
62 #define MEM2GC( mem ) ((mem).offset + si->memory[(mem).memory_type].virtual_addr_start)
63 
64 #define FREE_MEM( mem_type, handle ) \
65 	{ \
66 		radeon_free_mem fm; \
67 \
68 		fm.magic = RADEON_PRIVATE_DATA_MAGIC; \
69 		fm.memory_type = mem_type; \
70 		fm.handle = offset; \
71 \
72 		ioctl( ai->fd, RADEON_FREE_MEM, &fm ); \
73 	}
74 
75 #else
76 
77 // macros for kernel-space
78 
79 // allocate memory
80 // if memory_type is non-local, it is replaced with default non-local type
81 #define ALLOC_MEM( asize, mem_type, aglobal, handle, offset ) \
82 	if( mem_type == mt_nonlocal ) \
83 		mem_type = di->si->nonlocal_type; \
84 	res = mem_alloc( di->memmgr[mem_type], asize, NULL, handle, offset );
85 
86 // get address as seen by program to access allocated memory
87 // (memory_type must _not_ be non-local, see ALLOC_MEM)
88 #define MEM2CPU( memory_type, offset ) \
89 	((uint8 *)(memory_type == mt_local ? di->si->local_mem : \
90 	(memory_type == mt_PCI ? di->pci_gart.buffer.ptr : di->agp_gart.buffer.ptr)) \
91 	+ (offset))
92 
93 // get graphics card's virtual address of allocated memory
94 // (memory_type must _not_ be non-local, see ALLOC_MEM)
95 #define MEM2GC( memory_type, offset ) \
96 	(di->si->memory[(memory_type)].virtual_addr_start + (offset))
97 
98 // free memory
99 // if memory_type is non-local, it is replaced with default non-local type
100 #define FREE_MEM( mem_type, handle ) \
101 	mem_free( \
102 		di->memmgr[ mem_type == mt_nonlocal ? di->si->nonlocal_type : mem_type], \
103 		handle, NULL );
104 
105 #endif
106 
107 
108 void Radeon_DiscardAllIndirectBuffers( device_info *di );
109 
110 #define RADEON_SCRATCH_REG_OFFSET	32
111 
112 
113 void Radeon_FlushPixelCache( device_info *di );
114 
115 // wait until engine is idle;
116 // acquire_lock - 	true, if lock must be hold
117 //					false, if lock is already acquired
118 // keep_lock -		true, keep lock on exit (only valid if acquire_lock is true)
Radeon_WaitForIdle(device_info * di,bool acquire_lock,bool keep_lock)119 void Radeon_WaitForIdle( device_info *di, bool acquire_lock, bool keep_lock )
120 {
121 	if( acquire_lock )
122 		ACQUIRE_BEN( di->si->cp.lock );
123 
124 	Radeon_WaitForFifo( di, 64 );
125 
126 	while( 1 ) {
127 		bigtime_t start_time = system_time();
128 
129 		do {
130 			if( (INREG( di->regs, RADEON_RBBM_STATUS ) & RADEON_RBBM_ACTIVE) == 0 ) {
131 				Radeon_FlushPixelCache( di );
132 
133 				if( acquire_lock && !keep_lock)
134 					RELEASE_BEN( di->si->cp.lock );
135 
136 				return;
137 			}
138 
139 			snooze( 1 );
140 		} while( system_time() - start_time < 1000000 );
141 
142 		SHOW_ERROR( 3,
143 			"Engine didn't become idle (rbbm_status=%" B_PRIx32 ", "
144 			"cp_stat=%" B_PRIx32 ", "
145 			"tlb_address=%" B_PRIx32 ", "
146 			"tlb_data=%" B_PRIx32 ")",
147 			INREG( di->regs, RADEON_RBBM_STATUS ),
148 			INREG( di->regs, RADEON_CP_STAT ),
149 			INREG( di->regs, RADEON_AIC_TLB_ADDR ),
150 			INREG( di->regs, RADEON_AIC_TLB_DATA ));
151 
152 		LOG( di->si->log, _Radeon_WaitForIdle );
153 
154 		Radeon_ResetEngine( di );
155 	}
156 }
157 
158 
159 // wait until "entries" FIFO entries are empty
160 // lock must be hold
Radeon_WaitForFifo(device_info * di,int entries)161 void Radeon_WaitForFifo( device_info *di, int entries )
162 {
163 	while( 1 ) {
164 		bigtime_t start_time = system_time();
165 
166 		do {
167 			int slots = INREG( di->regs, RADEON_RBBM_STATUS ) & RADEON_RBBM_FIFOCNT_MASK;
168 
169 			if ( slots >= entries )
170 				return;
171 
172 			snooze( 1 );
173 		} while( system_time() - start_time < 1000000 );
174 
175 		LOG( di->si->log, _Radeon_WaitForFifo );
176 
177 		Radeon_ResetEngine( di );
178 	}
179 }
180 
181 // flush pixel cache of graphics card
Radeon_FlushPixelCache(device_info * di)182 void Radeon_FlushPixelCache( device_info *di )
183 {
184 	bigtime_t start_time;
185 
186 	OUTREGP( di->regs, RADEON_RB2D_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL,
187 		~RADEON_RB2D_DC_FLUSH_ALL );
188 
189 	start_time = system_time();
190 
191 	do {
192 		if( (INREG( di->regs, RADEON_RB2D_DSTCACHE_CTLSTAT )
193 			 & RADEON_RB2D_DC_BUSY) == 0 )
194 			return;
195 
196 		snooze( 1 );
197 	} while( system_time() - start_time < 1000000 );
198 
199 	LOG( di->si->log, _Radeon_FlushPixelCache );
200 
201 	SHOW_ERROR0( 0, "pixel cache didn't become empty" );
202 }
203 
204 // reset graphics card's engine
205 // lock must be hold
Radeon_ResetEngine(device_info * di)206 void Radeon_ResetEngine( device_info *di )
207 {
208 	vuint8 *regs = di->regs;
209 	shared_info *si = di->si;
210 	uint32 clock_cntl_index, mclk_cntl, rbbm_soft_reset, host_path_cntl;
211 	uint32 cur_read_ptr;
212 
213 	SHOW_FLOW0( 3, "" );
214 
215 	Radeon_FlushPixelCache( di );
216 
217 	clock_cntl_index = INREG( regs, RADEON_CLOCK_CNTL_INDEX );
218 	RADEONPllErrataAfterIndex( regs, di->asic );	// drm has no errata here!
219 	mclk_cntl = Radeon_INPLL( regs, di->asic, RADEON_MCLK_CNTL );
220 
221 	// enable clock of units to be reset
222 	Radeon_OUTPLL( regs, di->asic, RADEON_MCLK_CNTL, mclk_cntl |
223       RADEON_FORCEON_MCLKA |
224       RADEON_FORCEON_MCLKB |
225       RADEON_FORCEON_YCLKA |
226       RADEON_FORCEON_YCLKB |
227       RADEON_FORCEON_MC |
228       RADEON_FORCEON_AIC );
229 
230 	// do the reset
231     host_path_cntl = INREG( regs, RADEON_HOST_PATH_CNTL );
232 	rbbm_soft_reset = INREG( regs, RADEON_RBBM_SOFT_RESET );
233 
234 	OUTREG( regs, RADEON_RBBM_SOFT_RESET, (rbbm_soft_reset |
235 			RADEON_SOFT_RESET_CP |
236 			RADEON_SOFT_RESET_HI |
237 			RADEON_SOFT_RESET_SE |
238 			RADEON_SOFT_RESET_RE |
239 			RADEON_SOFT_RESET_PP |
240 			RADEON_SOFT_RESET_E2 |
241 			RADEON_SOFT_RESET_RB ) );
242 	INREG( regs, RADEON_RBBM_SOFT_RESET);
243 	OUTREG( regs, RADEON_RBBM_SOFT_RESET, rbbm_soft_reset &
244 		~( RADEON_SOFT_RESET_CP |
245 		   RADEON_SOFT_RESET_HI |
246 		   RADEON_SOFT_RESET_SE |
247 		   RADEON_SOFT_RESET_RE |
248 		   RADEON_SOFT_RESET_PP |
249 		   RADEON_SOFT_RESET_E2 |
250 		   RADEON_SOFT_RESET_RB ) );
251 	INREG( regs, RADEON_RBBM_SOFT_RESET);
252 
253     OUTREG( regs, RADEON_HOST_PATH_CNTL, host_path_cntl | RADEON_HDP_SOFT_RESET );
254     INREG( regs, RADEON_HOST_PATH_CNTL );
255     OUTREG( regs, RADEON_HOST_PATH_CNTL, host_path_cntl );
256 
257 	Radeon_OUTPLL( regs, di->asic, RADEON_MCLK_CNTL, mclk_cntl );
258    	OUTREG( regs, RADEON_CLOCK_CNTL_INDEX, clock_cntl_index );
259    	//RADEONPllErrataAfterIndex( regs, di->asic ); // drm doesn't do this here!
260    	OUTREG( regs, RADEON_RBBM_SOFT_RESET, rbbm_soft_reset);
261 
262 	if ( di->acc_dma )
263 	{
264 		// reset ring buffer
265 		cur_read_ptr = INREG( regs, RADEON_CP_RB_RPTR );
266 		OUTREG( regs, RADEON_CP_RB_WPTR, cur_read_ptr );
267 
268 		//if( si->cp.ring.head ) {
269 		// during init, there are no feedback data
270 		if( si->cp.feedback.mem_handle != 0 ) {
271 			*(uint32 *)MEM2CPU( si->cp.feedback.mem_type, si->cp.feedback.head_mem_offset) =
272 				cur_read_ptr;
273 			//	*si->cp.ring.head = cur_read_ptr;
274 			si->cp.ring.tail = cur_read_ptr;
275 		}
276 
277 		// mark all buffers as being finished
278 		Radeon_DiscardAllIndirectBuffers( di );
279 	}
280 
281 	++si->engine.count;
282 	return;
283 }
284 
285 
286 // upload Micro-Code of CP
loadMicroEngineRAMData(device_info * di)287 static void loadMicroEngineRAMData( device_info *di )
288 {
289 	int i;
290 	const uint32 (*microcode)[2];
291 
292 	SHOW_FLOW0( 3, "" );
293 
294 	switch( di->asic ) {
295 	case rt_r300:
296 	case rt_rv350:
297 	case rt_r350:
298 	case rt_rv380:
299 	case rt_r420:
300 		microcode = r300_cp_microcode;
301 		break;
302 	case rt_r200:
303 		microcode = r200_cp_microcode;
304 		break;
305 	case rt_rs100:
306 	default:
307 		microcode = radeon_cp_microcode;
308 	}
309 
310 	Radeon_WaitForIdle( di, false, false );
311 
312 	OUTREG( di->regs, RADEON_CP_ME_RAM_ADDR, 0 );
313 
314 	for ( i = 0 ; i < 256 ; i++ ) {
315 		OUTREG( di->regs, RADEON_CP_ME_RAM_DATAH, microcode[i][1] );
316 		OUTREG( di->regs, RADEON_CP_ME_RAM_DATAL, microcode[i][0] );
317 	}
318 }
319 
320 // aring_size - size of ring in dwords
initRingBuffer(device_info * di,int aring_size)321 static status_t initRingBuffer( device_info *di, int aring_size )
322 {
323 	status_t res;
324 	shared_info *si = di->si;
325 	CP_info *cp = &si->cp;
326 	vuint8 *regs = di->regs;
327 	int32 offset;
328 	memory_type_e memory_type;
329 
330 	memset( &cp->ring, 0, sizeof( cp->ring ));
331 
332 	// ring and indirect buffers can be either in AGP or PCI GART
333 	// (it seems that they cannot be in graphics memory, at least
334 	//  I had serious coherency problems when I tried that)
335 	memory_type = mt_nonlocal;
336 
337 	ALLOC_MEM( aring_size * 4, memory_type, true,
338 		&cp->ring.mem_handle, &offset );
339 
340 	if( res != B_OK ) {
341 		SHOW_ERROR0( 0, "Cannot allocate ring buffer" );
342 		return res;
343 	}
344 
345 	// setup CP buffer
346 	cp->ring.mem_type = memory_type;
347 	cp->ring.mem_offset = offset;
348 	cp->ring.vm_base = MEM2GC( memory_type, offset );
349 	cp->ring.size = aring_size;
350 	cp->ring.tail_mask = aring_size - 1;
351 	OUTREG( regs, RADEON_CP_RB_BASE, cp->ring.vm_base );
352 	SHOW_INFO( 3, "CP buffer address=%" B_PRIx32, cp->ring.vm_base );
353 
354 	// set ring buffer size
355 	// (it's log2 of qwords)
356 	OUTREG( regs, RADEON_CP_RB_CNTL, radeon_log2( cp->ring.size / 2 ));
357 	SHOW_INFO( 3, "CP buffer size mask=%d", radeon_log2( cp->ring.size / 2 ) );
358 
359 	// set write pointer delay to zero;
360 	// we assume that memory synchronization is done correctly my MoBo
361 	// and Radeon_SendCP contains a hack that hopefully fixes such problems
362 	OUTREG( regs, RADEON_CP_RB_WPTR_DELAY, 0 );
363 
364 	memset( MEM2CPU( cp->ring.mem_type, cp->ring.mem_offset), 0, cp->ring.size * 4 );
365 
366 	// set CP buffer pointers
367 	OUTREG( regs, RADEON_CP_RB_RPTR, 0 );
368 	OUTREG( regs, RADEON_CP_RB_WPTR, 0 );
369 	//*cp->ring.head = 0;
370 	cp->ring.tail = 0;
371 
372 	return B_OK;
373 }
374 
uninitRingBuffer(device_info * di)375 static void uninitRingBuffer( device_info *di )
376 {
377 	vuint8 *regs = di->regs;
378 
379 	// abort any activity
380 	Radeon_ResetEngine( di );
381 
382 	// disable CP BM
383 	OUTREG( regs, RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIDIS_INDDIS );
384 	// read-back for flushing
385 	INREG( regs, RADEON_CP_CSQ_CNTL );
386 
387 	FREE_MEM( mt_nonlocal, di->si->cp.ring.mem_handle );
388 }
389 
initCPFeedback(device_info * di)390 static status_t initCPFeedback( device_info *di )
391 {
392 	CP_info *cp = &di->si->cp;
393 	vuint8 *regs = di->regs;
394 	int32 offset;
395 	memory_type_e memory_type;
396 	status_t res;
397 
398 	// status information should be in PCI memory, so CPU can
399 	// poll it without locking the bus (PCI memory is the only
400 	// cachable memory available)
401 	memory_type = mt_PCI;
402 
403 	ALLOC_MEM( RADEON_SCRATCH_REG_OFFSET + 0x40, memory_type, true,
404 		&cp->feedback.mem_handle, &offset );
405 
406 	if( res != B_OK ) {
407 		SHOW_ERROR0( 0, "Cannot allocate buffers for status information" );
408 		return res;
409 	}
410 
411 	// setup CP read pointer buffer
412 	cp->feedback.mem_type = memory_type;
413 	cp->feedback.head_mem_offset = offset;
414 	cp->feedback.head_vm_address = MEM2GC( memory_type, cp->feedback.head_mem_offset );
415 	OUTREG( regs, RADEON_CP_RB_RPTR_ADDR, cp->feedback.head_vm_address );
416 	SHOW_INFO( 3, "CP read pointer buffer==%" B_PRIx32,
417 		cp->feedback.head_vm_address );
418 
419 	// setup scratch register buffer
420 	cp->feedback.scratch_mem_offset = offset + RADEON_SCRATCH_REG_OFFSET;
421 	cp->feedback.scratch_vm_start = MEM2GC( memory_type, cp->feedback.scratch_mem_offset );
422 	OUTREG( regs, RADEON_SCRATCH_ADDR, cp->feedback.scratch_vm_start );
423 	OUTREG( regs, RADEON_SCRATCH_UMSK, 0x3f );
424 
425 	*(uint32 *)MEM2CPU( cp->feedback.mem_type, cp->feedback.head_mem_offset) = 0;
426 	memset( MEM2CPU( cp->feedback.mem_type, cp->feedback.scratch_mem_offset), 0, 0x40 );
427 	//*cp->ring.head = 0;
428 
429 	return B_OK;
430 }
431 
uninitCPFeedback(device_info * di)432 static void uninitCPFeedback( device_info *di )
433 {
434 	vuint8 *regs = di->regs;
435 
436 	// don't allow any scratch buffer update
437 	OUTREG( regs, RADEON_SCRATCH_UMSK, 0x0 );
438 
439 	FREE_MEM( mt_PCI, di->si->cp.feedback.mem_handle );
440 }
441 
initIndirectBuffers(device_info * di)442 static status_t initIndirectBuffers( device_info *di )
443 {
444 	CP_info *cp = &di->si->cp;
445 	int32 offset;
446 	memory_type_e memory_type;
447 	int i;
448 	status_t res;
449 
450 	memory_type = mt_nonlocal;
451 
452 	ALLOC_MEM( NUM_INDIRECT_BUFFERS * INDIRECT_BUFFER_SIZE * 4, memory_type,
453 		true, &cp->buffers.mem_handle, &offset );
454 
455 	if( res != B_OK ) {
456 		SHOW_ERROR0( 0, "Cannot allocate indirect buffers" );
457 		return B_ERROR;
458 	}
459 
460 	cp->buffers.mem_type = memory_type;
461 	cp->buffers.mem_offset = offset;
462 	cp->buffers.vm_start = MEM2GC( memory_type, cp->buffers.mem_offset );
463 
464 	for( i = 0; i < NUM_INDIRECT_BUFFERS - 1; ++i ) {
465 		cp->buffers.buffers[i].next = i + 1;
466 	}
467 
468 	cp->buffers.buffers[i].next = -1;
469 
470 	cp->buffers.free_list = 0;
471 	cp->buffers.oldest = -1;
472 	cp->buffers.newest = -1;
473 	cp->buffers.active_state = -1;
474 	cp->buffers.cur_tag = 0;
475 
476 	memset( MEM2CPU( cp->buffers.mem_type, cp->buffers.mem_offset), 0,
477 		NUM_INDIRECT_BUFFERS * INDIRECT_BUFFER_SIZE * 4 );
478 
479 	return B_OK;
480 }
481 
uninitIndirectBuffers(device_info * di)482 static void uninitIndirectBuffers( device_info *di )
483 {
484 	FREE_MEM( mt_nonlocal, di->si->cp.buffers.mem_handle );
485 }
486 
487 // initialize CP so it's ready for BM
Radeon_InitCP(device_info * di)488 status_t Radeon_InitCP( device_info *di )
489 {
490 	thread_id thid;
491     thread_info thinfo;
492 	status_t res;
493 
494 	SHOW_FLOW0( 3, "" );
495 
496 	// this is _really_ necessary so functions like ResetEngine() know
497 	// that the CP is not set up yet
498 	memset( &di->si->cp, 0, sizeof( di->si->cp ));
499 
500 	if( (res = INIT_BEN( di->si->cp.lock, "Radeon CP" )) < 0 )
501 		return res;
502 
503 	// HACK: change owner of benaphore semaphore to team of calling thread;
504 	// reason: user code cannot acquire kernel semaphores, but the accelerant
505 	// is in user space; interestingly, it's enough to change the semaphore's
506 	// owner to _any_ non-system team (that's the only security check done by
507 	// the kernel)
508 	thid = find_thread( NULL );
509     get_thread_info( thid, &thinfo );
510     set_sem_owner( di->si->cp.lock.sem, thinfo.team );
511 
512 	// init raw CP
513 	if ( di->acc_dma ) loadMicroEngineRAMData( di );
514 
515 	// do soft-reset
516 	Radeon_ResetEngine( di );
517 
518 	// after warm-reset, the CP may still be active and thus react to
519 	// register writes during initialization unpredictably, so we better
520 	// stop it first
521 	OUTREG( di->regs, RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIDIS_INDDIS );
522 	INREG( di->regs, RADEON_CP_CSQ_CNTL );
523 
524 	// reset CP to make disabling active
525 	Radeon_ResetEngine( di );
526 
527 	if ( di->acc_dma )
528 	{
529 		res = initRingBuffer( di, CP_RING_SIZE );
530 		if( res < 0 )
531 			goto err4;
532 
533 		res = initCPFeedback( di );
534 		if( res < 0 )
535 			goto err3;
536 
537 		res = initIndirectBuffers( di );
538 		if( res < 0 )
539 			goto err2;
540 
541 		// tell CP to use BM
542 		Radeon_WaitForIdle( di, false, false );
543 
544 		// enable direct and indirect CP bus mastering
545 		OUTREG( di->regs, RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM );
546 
547 		// allow bus mastering in general
548 		OUTREGP( di->regs, RADEON_BUS_CNTL, 0, ~RADEON_BUS_MASTER_DIS );
549 	}
550 
551 
552 	// don't allow mixing of 2D/3D/scratch/wait_until commands
553 	// (in fact, this doesn't seem to make any difference as we do a
554 	// manual sync in all these cases anyway)
555 	OUTREG( di->regs, RADEON_ISYNC_CNTL,
556 		RADEON_ISYNC_ANY2D_IDLE3D |
557 		RADEON_ISYNC_ANY3D_IDLE2D |
558 		RADEON_ISYNC_WAIT_IDLEGUI |
559 		RADEON_ISYNC_CPSCRATCH_IDLEGUI );
560 
561 	SHOW_FLOW( 3, "bus_cntl=%" B_PRIx32, INREG( di->regs, RADEON_BUS_CNTL ));
562 
563 	SHOW_FLOW0( 3, "Done" );
564 
565 	return B_OK;
566 
567 //err:
568 //	uninitIndirectBuffers( ai );
569 err2:
570 	uninitCPFeedback( di );
571 err3:
572 	uninitRingBuffer( di );
573 err4:
574 	DELETE_BEN( di->si->cp.lock );
575 	return res;
576 }
577 
578 
579 // shutdown CP, freeing any memory
Radeon_UninitCP(device_info * di)580 void Radeon_UninitCP( device_info *di )
581 {
582 	vuint8 *regs = di->regs;
583 
584 	// abort any pending commands
585 	Radeon_ResetEngine( di );
586 
587 	// disable CP BM
588 	OUTREG( regs, RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIDIS_INDDIS );
589 	// read-back for flushing
590 	INREG( regs, RADEON_CP_CSQ_CNTL );
591 
592 	if ( di->acc_dma )
593 	{
594 		uninitRingBuffer( di );
595 		uninitCPFeedback( di );
596 		uninitIndirectBuffers( di );
597 	}
598 
599 	DELETE_BEN( di->si->cp.lock );
600 }
601 
602 
603 // mark all indirect buffers as being free;
604 // this should only be called after a reset;
605 // lock must be hold
Radeon_DiscardAllIndirectBuffers(device_info * di)606 void Radeon_DiscardAllIndirectBuffers( device_info *di )
607 {
608 	CP_info *cp = &di->si->cp;
609 
610 	// during init, there is no indirect buffer
611 	if( cp->buffers.mem_handle == 0 )
612 		return;
613 
614 	// mark all sent indirect buffers as free
615 	while( cp->buffers.oldest != -1 ) {
616 		indirect_buffer *oldest_buffer =
617 			&cp->buffers.buffers[cp->buffers.oldest];
618 		int tmp_oldest_buffer;
619 
620 		SHOW_FLOW( 0, "%d", cp->buffers.oldest );
621 
622 		// remove buffer from "used" list
623 		tmp_oldest_buffer = oldest_buffer->next;
624 
625 		if( tmp_oldest_buffer == -1 )
626 			cp->buffers.newest = -1;
627 
628 		// put it on free list
629 		oldest_buffer->next = cp->buffers.free_list;
630 		cp->buffers.free_list = cp->buffers.oldest;
631 
632 		cp->buffers.oldest = tmp_oldest_buffer;
633 	}
634 }
635 
636 // lets hide this in here, as it's got lots of lovely register headers already...
637 // does it go here, or in the accelerant anyway?
638 // for now i'm assuming you turn on dynamic clocks, and they take care of themselves onwards...
639 // so doing it at driver init seems sensible after a valid detection of course...
Radeon_SetDynamicClock(device_info * di,int mode)640 void Radeon_SetDynamicClock( device_info *di, int mode)
641 {
642     vuint8 *regs = di->regs;
643     radeon_type asic = di->asic;
644     uint32 tmp;
645 
646     switch(mode) {
647 	case 0: /* Turn everything OFF (ForceON to everything)*/
648 		if ( di->num_crtc != 2 ) {
649 			tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_CNTL);
650 			tmp |= (RADEON_SCLK_FORCE_CP   | RADEON_SCLK_FORCE_HDP |
651 				RADEON_SCLK_FORCE_DISP1 | RADEON_SCLK_FORCE_TOP |
652 				RADEON_SCLK_FORCE_E2   | RADEON_SCLK_FORCE_SE  |
653 				RADEON_SCLK_FORCE_IDCT | RADEON_SCLK_FORCE_VIP |
654 				RADEON_SCLK_FORCE_RE   | RADEON_SCLK_FORCE_PB  |
655 				RADEON_SCLK_FORCE_TAM  | RADEON_SCLK_FORCE_TDM |
656 				RADEON_SCLK_FORCE_RB);
657 			Radeon_OUTPLL(regs, asic, RADEON_SCLK_CNTL, tmp);
658 		} else if (asic == rt_rv350) {
659 			/* for RV350/M10, no delays are required. */
660 			tmp = Radeon_INPLL(regs, asic, R300_SCLK_CNTL2);
661 			tmp |= (R300_SCLK_FORCE_TCL |
662 				R300_SCLK_FORCE_GA  |
663 				R300_SCLK_FORCE_CBA);
664 			Radeon_OUTPLL(regs, asic, R300_SCLK_CNTL2, tmp);
665 
666 			tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_CNTL);
667 			tmp |= (RADEON_SCLK_FORCE_DISP2 | RADEON_SCLK_FORCE_CP      |
668 				RADEON_SCLK_FORCE_HDP   | RADEON_SCLK_FORCE_DISP1   |
669 				RADEON_SCLK_FORCE_TOP   | RADEON_SCLK_FORCE_E2      |
670 				R300_SCLK_FORCE_VAP     | RADEON_SCLK_FORCE_IDCT    |
671 				RADEON_SCLK_FORCE_VIP   | R300_SCLK_FORCE_SR        |
672 				R300_SCLK_FORCE_PX      | R300_SCLK_FORCE_TX        |
673 				R300_SCLK_FORCE_US      | RADEON_SCLK_FORCE_TV_SCLK |
674 				R300_SCLK_FORCE_SU      | RADEON_SCLK_FORCE_OV0);
675 			Radeon_OUTPLL(regs, asic, RADEON_SCLK_CNTL, tmp);
676 
677 			tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_MORE_CNTL);
678 			tmp |= RADEON_SCLK_MORE_FORCEON;
679 			Radeon_OUTPLL(regs, asic, RADEON_SCLK_MORE_CNTL, tmp);
680 
681 			tmp = Radeon_INPLL(regs, asic, RADEON_MCLK_CNTL);
682 			tmp |= (RADEON_FORCEON_MCLKA |
683 				RADEON_FORCEON_MCLKB |
684 				RADEON_FORCEON_YCLKA |
685 				RADEON_FORCEON_YCLKB |
686 				RADEON_FORCEON_MC);
687 			Radeon_OUTPLL(regs, asic, RADEON_MCLK_CNTL, tmp);
688 
689 			tmp = Radeon_INPLL(regs, asic, RADEON_VCLK_ECP_CNTL);
690 			tmp &= ~(RADEON_PIXCLK_ALWAYS_ONb  |
691 				RADEON_PIXCLK_DAC_ALWAYS_ONb |
692 			R300_DISP_DAC_PIXCLK_DAC_BLANK_OFF);
693 			Radeon_OUTPLL(regs, asic, RADEON_VCLK_ECP_CNTL, tmp);
694 
695 			tmp = Radeon_INPLL(regs, asic, RADEON_PIXCLKS_CNTL);
696 			tmp &= ~(RADEON_PIX2CLK_ALWAYS_ONb         |
697 				RADEON_PIX2CLK_DAC_ALWAYS_ONb     |
698 				RADEON_DISP_TVOUT_PIXCLK_TV_ALWAYS_ONb |
699 				R300_DVOCLK_ALWAYS_ONb            |
700 				RADEON_PIXCLK_BLEND_ALWAYS_ONb    |
701 				RADEON_PIXCLK_GV_ALWAYS_ONb       |
702 				R300_PIXCLK_DVO_ALWAYS_ONb        |
703 				RADEON_PIXCLK_LVDS_ALWAYS_ONb     |
704 				RADEON_PIXCLK_TMDS_ALWAYS_ONb     |
705 				R300_PIXCLK_TRANS_ALWAYS_ONb      |
706 				R300_PIXCLK_TVO_ALWAYS_ONb        |
707 				R300_P2G2CLK_ALWAYS_ONb            |
708 				R300_P2G2CLK_DAC_ALWAYS_ONb           |
709 				R300_DISP_DAC_PIXCLK_DAC2_BLANK_OFF);
710 			Radeon_OUTPLL(regs, asic, RADEON_PIXCLKS_CNTL, tmp);
711 		}  else {
712 			tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_CNTL);
713 			tmp |= (RADEON_SCLK_FORCE_CP | RADEON_SCLK_FORCE_E2);
714 			tmp |= RADEON_SCLK_FORCE_SE;
715 
716 			if ( di->num_crtc != 2 ) {
717 				tmp |= ( RADEON_SCLK_FORCE_RB    |
718 				RADEON_SCLK_FORCE_TDM   |
719 				RADEON_SCLK_FORCE_TAM   |
720 				RADEON_SCLK_FORCE_PB    |
721 				RADEON_SCLK_FORCE_RE    |
722 				RADEON_SCLK_FORCE_VIP   |
723 				RADEON_SCLK_FORCE_IDCT  |
724 				RADEON_SCLK_FORCE_TOP   |
725 				RADEON_SCLK_FORCE_DISP1 |
726 				RADEON_SCLK_FORCE_DISP2 |
727 				RADEON_SCLK_FORCE_HDP    );
728 			} else if ((asic == rt_r300) || (asic == rt_r350)) {
729 				tmp |= ( RADEON_SCLK_FORCE_HDP   |
730 					RADEON_SCLK_FORCE_DISP1 |
731 					RADEON_SCLK_FORCE_DISP2 |
732 					RADEON_SCLK_FORCE_TOP   |
733 					RADEON_SCLK_FORCE_IDCT  |
734 					RADEON_SCLK_FORCE_VIP);
735 			}
736 			Radeon_OUTPLL(regs, asic, RADEON_SCLK_CNTL, tmp);
737 
738 			snooze(16000);
739 
740 			if ((asic == rt_r300) || (asic == rt_r350)) {
741 				tmp = Radeon_INPLL(regs, asic, R300_SCLK_CNTL2);
742 				tmp |= ( R300_SCLK_FORCE_TCL |
743 					R300_SCLK_FORCE_GA  |
744 					R300_SCLK_FORCE_CBA);
745 				Radeon_OUTPLL(regs, asic, R300_SCLK_CNTL2, tmp);
746 				snooze(16000);
747 			}
748 
749 			if (di->is_igp) {
750 				tmp = Radeon_INPLL(regs, asic, RADEON_MCLK_CNTL);
751 				tmp &= ~(RADEON_FORCEON_MCLKA |
752 					RADEON_FORCEON_YCLKA);
753 				Radeon_OUTPLL(regs, asic, RADEON_MCLK_CNTL, tmp);
754 				snooze(16000);
755 			}
756 
757 			if ((asic == rt_rv200) ||
758 				(asic == rt_rv250) ||
759 				(asic == rt_rv280)) {
760 				tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_MORE_CNTL);
761 				tmp |= RADEON_SCLK_MORE_FORCEON;
762 				Radeon_OUTPLL(regs, asic, RADEON_SCLK_MORE_CNTL, tmp);
763 				snooze(16000);
764 			}
765 
766 			tmp = Radeon_INPLL(regs, asic, RADEON_PIXCLKS_CNTL);
767 			tmp &= ~(RADEON_PIX2CLK_ALWAYS_ONb         |
768 				RADEON_PIX2CLK_DAC_ALWAYS_ONb     |
769 				RADEON_PIXCLK_BLEND_ALWAYS_ONb    |
770 				RADEON_PIXCLK_GV_ALWAYS_ONb       |
771 				RADEON_PIXCLK_DIG_TMDS_ALWAYS_ONb |
772 				RADEON_PIXCLK_LVDS_ALWAYS_ONb     |
773 				RADEON_PIXCLK_TMDS_ALWAYS_ONb);
774 
775 			Radeon_OUTPLL(regs, asic, RADEON_PIXCLKS_CNTL, tmp);
776 			snooze(16000);
777 
778 			tmp = Radeon_INPLL(regs, asic, RADEON_VCLK_ECP_CNTL);
779 			tmp &= ~(RADEON_PIXCLK_ALWAYS_ONb  |
780 				RADEON_PIXCLK_DAC_ALWAYS_ONb);
781 			Radeon_OUTPLL(regs, asic, RADEON_VCLK_ECP_CNTL, tmp);
782 		}
783 		SHOW_FLOW0( 3, "Dynamic Clock Scaling Disabled" );
784 		break;
785 	case 1:
786 		if ( di->num_crtc != 2 ) {
787 			tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_CNTL);
788 			if ((INREG( regs, RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) > RADEON_CFG_ATI_REV_A13) {
789 				tmp &= ~(RADEON_SCLK_FORCE_CP | RADEON_SCLK_FORCE_RB);
790 			}
791 			tmp &= ~(RADEON_SCLK_FORCE_HDP  | RADEON_SCLK_FORCE_DISP1 |
792 				RADEON_SCLK_FORCE_TOP  | RADEON_SCLK_FORCE_SE   |
793 				RADEON_SCLK_FORCE_IDCT | RADEON_SCLK_FORCE_RE   |
794 				RADEON_SCLK_FORCE_PB   | RADEON_SCLK_FORCE_TAM  |
795 				RADEON_SCLK_FORCE_TDM);
796 			Radeon_OUTPLL(regs, asic, RADEON_SCLK_CNTL, tmp);
797 		} else if ((asic == rt_r300)
798 				|| (asic == rt_r350)
799 				|| (asic == rt_rv350)) {
800 			if (asic == rt_rv350) {
801 				tmp = Radeon_INPLL(regs, asic, R300_SCLK_CNTL2);
802 				tmp &= ~(R300_SCLK_FORCE_TCL |
803 					R300_SCLK_FORCE_GA  |
804 					R300_SCLK_FORCE_CBA);
805 				tmp |=  (R300_SCLK_TCL_MAX_DYN_STOP_LAT |
806 					R300_SCLK_GA_MAX_DYN_STOP_LAT  |
807 					R300_SCLK_CBA_MAX_DYN_STOP_LAT);
808 				Radeon_OUTPLL(regs, asic, R300_SCLK_CNTL2, tmp);
809 
810 				tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_CNTL);
811 				tmp &= ~(RADEON_SCLK_FORCE_DISP2 | RADEON_SCLK_FORCE_CP      |
812 					RADEON_SCLK_FORCE_HDP   | RADEON_SCLK_FORCE_DISP1   |
813 					RADEON_SCLK_FORCE_TOP   | RADEON_SCLK_FORCE_E2      |
814 					R300_SCLK_FORCE_VAP     | RADEON_SCLK_FORCE_IDCT    |
815 					RADEON_SCLK_FORCE_VIP   | R300_SCLK_FORCE_SR        |
816 					R300_SCLK_FORCE_PX      | R300_SCLK_FORCE_TX        |
817 					R300_SCLK_FORCE_US      | RADEON_SCLK_FORCE_TV_SCLK |
818 					R300_SCLK_FORCE_SU      | RADEON_SCLK_FORCE_OV0);
819 					tmp |=  RADEON_DYN_STOP_LAT_MASK;
820 				Radeon_OUTPLL(regs, asic, RADEON_SCLK_CNTL, tmp);
821 
822 				tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_MORE_CNTL);
823 				tmp &= ~RADEON_SCLK_MORE_FORCEON;
824 				tmp |=  RADEON_SCLK_MORE_MAX_DYN_STOP_LAT;
825 				Radeon_OUTPLL(regs, asic, RADEON_SCLK_MORE_CNTL, tmp);
826 
827 				tmp = Radeon_INPLL(regs, asic, RADEON_VCLK_ECP_CNTL);
828 				tmp |= (RADEON_PIXCLK_ALWAYS_ONb |
829 					RADEON_PIXCLK_DAC_ALWAYS_ONb);
830 				Radeon_OUTPLL(regs, asic, RADEON_VCLK_ECP_CNTL, tmp);
831 
832 				tmp = Radeon_INPLL(regs, asic, RADEON_PIXCLKS_CNTL);
833 				tmp |= (RADEON_PIX2CLK_ALWAYS_ONb         |
834 					RADEON_PIX2CLK_DAC_ALWAYS_ONb     |
835 					RADEON_DISP_TVOUT_PIXCLK_TV_ALWAYS_ONb |
836 					R300_DVOCLK_ALWAYS_ONb            |
837 					RADEON_PIXCLK_BLEND_ALWAYS_ONb    |
838 					RADEON_PIXCLK_GV_ALWAYS_ONb       |
839 					R300_PIXCLK_DVO_ALWAYS_ONb        |
840 					RADEON_PIXCLK_LVDS_ALWAYS_ONb     |
841 					RADEON_PIXCLK_TMDS_ALWAYS_ONb     |
842 					R300_PIXCLK_TRANS_ALWAYS_ONb      |
843 					R300_PIXCLK_TVO_ALWAYS_ONb        |
844 					R300_P2G2CLK_ALWAYS_ONb           |
845 					R300_P2G2CLK_DAC_ALWAYS_ONb);
846 				Radeon_OUTPLL(regs, asic, RADEON_PIXCLKS_CNTL, tmp);
847 
848 				tmp = Radeon_INPLL(regs, asic, RADEON_MCLK_MISC);
849 				tmp |= (RADEON_MC_MCLK_DYN_ENABLE |
850 					RADEON_IO_MCLK_DYN_ENABLE);
851 				Radeon_OUTPLL(regs, asic, RADEON_MCLK_MISC, tmp);
852 
853 				tmp = Radeon_INPLL(regs, asic, RADEON_MCLK_CNTL);
854 				tmp |= (RADEON_FORCEON_MCLKA |
855 					RADEON_FORCEON_MCLKB);
856 
857 				tmp &= ~(RADEON_FORCEON_YCLKA  |
858 					RADEON_FORCEON_YCLKB  |
859 					RADEON_FORCEON_MC);
860 
861 				/* Some releases of vbios have set DISABLE_MC_MCLKA
862 				and DISABLE_MC_MCLKB bits in the vbios table.  Setting these
863 				bits will cause H/W hang when reading video memory with dynamic clocking
864 				enabled. */
865 				if ((tmp & R300_DISABLE_MC_MCLKA) &&
866 				(tmp & R300_DISABLE_MC_MCLKB)) {
867 					/* If both bits are set, then check the active channels */
868 					tmp = Radeon_INPLL(regs, asic, RADEON_MCLK_CNTL);
869 					if (di->ram.width == 64) {
870 						if (INREG( regs, RADEON_MEM_CNTL) & R300_MEM_USE_CD_CH_ONLY)
871 						tmp &= ~R300_DISABLE_MC_MCLKB;
872 						else
873 						tmp &= ~R300_DISABLE_MC_MCLKA;
874 					} else {
875 						tmp &= ~(R300_DISABLE_MC_MCLKA |
876 						R300_DISABLE_MC_MCLKB);
877 					}
878 				}
879 
880 				Radeon_OUTPLL(regs, asic, RADEON_MCLK_CNTL, tmp);
881 			} else {
882 				tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_CNTL);
883 				tmp &= ~(R300_SCLK_FORCE_VAP);
884 				tmp |= RADEON_SCLK_FORCE_CP;
885 				Radeon_OUTPLL(regs, asic, RADEON_SCLK_CNTL, tmp);
886 				snooze(15000);
887 
888 				tmp = Radeon_INPLL(regs, asic, R300_SCLK_CNTL2);
889 				tmp &= ~(R300_SCLK_FORCE_TCL |
890 				R300_SCLK_FORCE_GA  |
891 				R300_SCLK_FORCE_CBA);
892 				Radeon_OUTPLL(regs, asic, R300_SCLK_CNTL2, tmp);
893 			}
894 		} else {
895 			tmp = Radeon_INPLL(regs, asic, RADEON_CLK_PWRMGT_CNTL);
896 
897 			tmp &= ~(RADEON_ACTIVE_HILO_LAT_MASK     |
898 				RADEON_DISP_DYN_STOP_LAT_MASK   |
899 				RADEON_DYN_STOP_MODE_MASK);
900 
901 			tmp |= (RADEON_ENGIN_DYNCLK_MODE |
902 			(0x01 << RADEON_ACTIVE_HILO_LAT_SHIFT));
903 			Radeon_OUTPLL(regs, asic, RADEON_CLK_PWRMGT_CNTL, tmp);
904 			snooze(15000);
905 
906 			tmp = Radeon_INPLL(regs, asic, RADEON_CLK_PIN_CNTL);
907 			tmp |= RADEON_SCLK_DYN_START_CNTL;
908 			Radeon_OUTPLL(regs, asic, RADEON_CLK_PIN_CNTL, tmp);
909 			snooze(15000);
910 
911 			/* When DRI is enabled, setting DYN_STOP_LAT to zero can cause some R200
912 			to lockup randomly, leave them as set by BIOS.
913 			*/
914 			tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_CNTL);
915 			/*tmp &= RADEON_SCLK_SRC_SEL_MASK;*/
916 			tmp &= ~RADEON_SCLK_FORCEON_MASK;
917 
918 			/*RAGE_6::A11 A12 A12N1 A13, RV250::A11 A12, R300*/
919 			if (((asic == rt_rv250) &&
920 				((INREG( regs, RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) <
921 				  RADEON_CFG_ATI_REV_A13)) ||
922 				((asic == rt_rv100) &&
923 				((INREG( regs, RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) <=
924 				  RADEON_CFG_ATI_REV_A13)))
925 			{
926 				tmp |= RADEON_SCLK_FORCE_CP;
927 				tmp |= RADEON_SCLK_FORCE_VIP;
928 			}
929 
930 			Radeon_OUTPLL(regs, asic, RADEON_SCLK_CNTL, tmp);
931 
932 			if ((asic == rt_rv200) ||
933 				(asic == rt_rv250) ||
934 				(asic == rt_rv280)) {
935 				tmp = Radeon_INPLL(regs, asic, RADEON_SCLK_MORE_CNTL);
936 				tmp &= ~RADEON_SCLK_MORE_FORCEON;
937 
938 				/* RV200::A11 A12 RV250::A11 A12 */
939 				if (((asic == rt_rv200) ||
940 					 (asic == rt_rv250)) &&
941 					((INREG( regs, RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) <
942 					  RADEON_CFG_ATI_REV_A13))
943 				{
944 					tmp |= RADEON_SCLK_MORE_FORCEON;
945 				}
946 				Radeon_OUTPLL(regs, asic, RADEON_SCLK_MORE_CNTL, tmp);
947 				snooze(15000);
948 			}
949 
950 			/* RV200::A11 A12, RV250::A11 A12 */
951 			if (((asic == rt_rv200) ||
952 				 (asic == rt_rv250)) &&
953 				((INREG( regs, RADEON_CONFIG_CNTL) & RADEON_CFG_ATI_REV_ID_MASK) <
954 				  RADEON_CFG_ATI_REV_A13))
955 			{
956 				tmp = Radeon_INPLL(regs, asic, RADEON_PLL_PWRMGT_CNTL);
957 				tmp |= RADEON_TCL_BYPASS_DISABLE;
958 				Radeon_OUTPLL(regs, asic, RADEON_PLL_PWRMGT_CNTL, tmp);
959 			}
960 			snooze(15000);
961 
962 			/*enable dynamic mode for display clocks (PIXCLK and PIX2CLK)*/
963 			tmp = Radeon_INPLL(regs, asic, RADEON_PIXCLKS_CNTL);
964 			tmp |=  (RADEON_PIX2CLK_ALWAYS_ONb         |
965 				RADEON_PIX2CLK_DAC_ALWAYS_ONb     |
966 				RADEON_PIXCLK_BLEND_ALWAYS_ONb    |
967 				RADEON_PIXCLK_GV_ALWAYS_ONb       |
968 				RADEON_PIXCLK_DIG_TMDS_ALWAYS_ONb |
969 				RADEON_PIXCLK_LVDS_ALWAYS_ONb     |
970 				RADEON_PIXCLK_TMDS_ALWAYS_ONb);
971 
972 			Radeon_OUTPLL(regs, asic, RADEON_PIXCLKS_CNTL, tmp);
973 			snooze(15000);
974 
975 			tmp = Radeon_INPLL(regs, asic, RADEON_VCLK_ECP_CNTL);
976 			tmp |= (RADEON_PIXCLK_ALWAYS_ONb  |
977 				RADEON_PIXCLK_DAC_ALWAYS_ONb);
978 
979 			Radeon_OUTPLL(regs, asic, RADEON_VCLK_ECP_CNTL, tmp);
980 			snooze(15000);
981 		}
982 		SHOW_FLOW0( 3, "Dynamic Clock Scaling Enabled" );
983 		break;
984 	default:
985 		break;
986 	}
987 }
988