xref: /haiku/src/add-ons/accelerants/radeon/EngineManagment.c (revision 24159a0c7d6d6dcba9f2a0c1a7c08d2c8167f21b)
1 /*
2 	Copyright (c) 2002, Thomas Kurschel
3 
4 
5 	Part of Radeon accelerant
6 
7 	Hardware accelerator management
8 
9 	All accelerator commands go through the following steps:
10 	- accelerant adds command to CP buffer and updates CP write pointer
11 	- CP fetches command and sends it to MicroController
12 	- MicroController instructs 2D unit to execute command
13 	- 2D unit draws into 2D Destination Cache (DC)
14 	- 2D Destination Cache is drained to frame buffer
15 
16 	Whenever a token is required by BeOS, a command is queued to write
17 	the timestamp into Scratch Register 0. I haven't fully understand
18 	when and how coherancy is assured by Radeon, so I assume the following:
19 	- when the timestamp is written, all previous commands have been issued,
20 	  i.e. they are read and executed by the microcontroller
21 	- to make sure previously issued 2D commands have been finished,
22 	  a WAIT_2D_IDLECLEAN command is inserted before the scratch register
23 	  write
24 	- to flush the destination cache, a RB2D_DC_FLUSH_ALL command is
25 	  issued before the wait; I hope that the wait command also waits for
26 	  the flush command, but I'm not sure about that
27 
28 	Remains the cache coherency problem. It you can set various bits in
29 	DSTCACHE_MODE register to assure that, but first I don't really understand
30 	them, and second I'm not sure which other caches/FIFO may make trouble.
31 	Especially, Be wants to use CPU and CP accesses in parallel. Hopefully,
32 	they don't interfere.
33 
34 	I know that the PAINT_MULTI commands makes trouble if you change the
35 	ROP to something else: CPU writes produce garbage in frame buffer for the
36 	next couple of accesses. Resetting the ROP to a simply copy helps, but
37 	I'm not sure what happens with concurrent CPU accesses to other areas
38 	of the frame buffer.
39 */
40 
41 
42 #include "radeon_accelerant.h"
43 #include "generic.h"
44 #include "rbbm_regs.h"
45 #include "GlobalData.h"
46 #include "mmio.h"
47 #include "CP.h"
48 
49 static engine_token radeon_engine_token = { 1, B_2D_ACCELERATION, NULL };
50 
51 // public function: return number of hardware engine
52 uint32 ACCELERANT_ENGINE_COUNT(void)
53 {
54 	// hm, is there *any* card sporting more then
55 	// one hardware accelerator???
56 	return 1;
57 }
58 
59 // write current sync token into CP stream;
60 // we instruct the CP to flush all kind of cache first to not interfere
61 // with subsequent host writes
62 static void writeSyncToken( accelerator_info *ai )
63 {
64 	// don't write token if it hasn't changed since last write
65 	if( ai->si->engine.count == ai->si->engine.written )
66 		return;
67 
68 	START_IB();
69 
70 	// flush pending data
71 	WRITE_IB_REG( RADEON_RB2D_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL );
72 
73 	// make sure commands are finished
74 	WRITE_IB_REG( RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN |
75 		RADEON_WAIT_3D_IDLECLEAN | RADEON_WAIT_HOST_IDLECLEAN );
76 
77 	// write scratch register
78 	WRITE_IB_REG( RADEON_SCRATCH_REG0, ai->si->engine.count );
79 
80 	ai->si->engine.written = ai->si->engine.count;
81 
82 	SUBMIT_IB();
83 }
84 
85 // public function: acquire engine for future use
86 //	capabilites - required 2D/3D capabilities of engine, ignored
87 //	max_wait - maximum time we want to wait (in ms?), ignored
88 //	st - when engine has been acquired, wait for this sync token
89 //	et - (out) specifier of the engine acquired
90 status_t ACQUIRE_ENGINE( uint32 capabilities, uint32 max_wait,
91 	sync_token *st, engine_token **et )
92 {
93 	shared_info *si = ai->si;
94 
95 	SHOW_FLOW0( 4, "" );
96 
97 	(void)capabilities;
98 	(void)max_wait;
99 
100 	ACQUIRE_BEN( si->engine.lock)
101 
102 	// wait for sync
103 	if (st)
104 		SYNC_TO_TOKEN( st );
105 
106 	*et = &radeon_engine_token;
107 	return B_OK;
108 }
109 
110 // public function: release accelerator
111 //	et - engine to release
112 //	st - (out) sync token to be filled out
113 status_t RELEASE_ENGINE( engine_token *et, sync_token *st )
114 {
115 	shared_info *si = ai->si;
116 
117 	SHOW_FLOW0( 4, "" );
118 
119 	// fill out sync token
120 	if (st) {
121 		writeSyncToken( ai );
122 
123 		st->engine_id = et->engine_id;
124 		st->counter = si->engine.count;
125 	}
126 
127 	RELEASE_BEN( ai->si->engine.lock )
128 
129 	return B_OK;
130 }
131 
132 // public function: wait until engine is idle
133 // ??? which engine to wait for? Is there anyone using this function?
134 //     is lock hold?
135 void WAIT_ENGINE_IDLE(void)
136 {
137 	SHOW_FLOW0( 4, "" );
138 
139 	Radeon_WaitForIdle( ai, false );
140 }
141 
142 // public function: get sync token
143 //	et - engine to wait for
144 //	st - (out) sync token to be filled out
145 status_t GET_SYNC_TOKEN( engine_token *et, sync_token *st )
146 {
147 	shared_info *si = ai->si;
148 
149 	SHOW_FLOW0( 4, "" );
150 
151 	writeSyncToken( ai );
152 
153 	st->engine_id = et->engine_id;
154 	st->counter = si->engine.count;
155 
156 	SHOW_FLOW( 4, "got counter=%d", si->engine.count );
157 
158 	return B_OK;
159 }
160 
161 // this is the same as the corresponding kernel function
162 void Radeon_Spin( uint32 delay )
163 {
164 	bigtime_t start_time;
165 
166 	start_time = system_time();
167 
168 	while( system_time() - start_time < delay )
169 		;
170 }
171 
172 // public: sync to token
173 //	st - token to wait for
174 status_t SYNC_TO_TOKEN( sync_token *st )
175 {
176 	shared_info *si = ai->si;
177 	bigtime_t start_time, sample_time;
178 
179 	SHOW_FLOW0( 4, "" );
180 
181 	start_time = system_time();
182 
183 	while( 1 ) {
184 		SHOW_FLOW( 4, "passed counter=%d",
185 			((uint32 *)(ai->mapped_memory[si->cp.feedback.mem_type].data + si->cp.feedback.scratch_mem_offset))[0] );
186 			//si->cp.scratch.ptr[0] );
187 
188 		// a bit nasty: counter is 64 bit, but we have 32 bit only,
189 		// this is a tricky calculation to handle wrap-arounds correctly
190 		if( (int32)(
191 			((uint32 *)(ai->mapped_memory[si->cp.feedback.mem_type].data + si->cp.feedback.scratch_mem_offset))[0]
192 			//si->cp.scratch.ptr[0]
193 			- st->counter) >= 0 )
194 			return B_OK;
195 		/*if( (int32)(INREG( ai->regs, RADEON_SCRATCH_REG0 ) - st->counter) >= 0 )
196 			return B_OK;*/
197 
198 		// commands have not been finished;
199 		// this is a good time to free completed buffers as we have to
200 		// busy-wait anyway
201 		ACQUIRE_BEN( si->cp.lock );
202 		Radeon_FreeIndirectBuffers( ai );
203 		RELEASE_BEN( si->cp.lock );
204 
205 		sample_time = system_time();
206 
207 		if( sample_time - start_time > 100000 )
208 			break;
209 
210 		// use exponential fall-off
211 		// in the beginning do busy-waiting, later on we let thread sleep
212 		// the micro-spin is used to reduce PCI load
213 		if( sample_time - start_time > 5000 )
214 			snooze( (sample_time - start_time) / 10 );
215 		else
216 			Radeon_Spin( 1 );
217 	}
218 
219 	// we could reset engine now, but caller doesn't need to acquire
220 	// engine before calling this function, so we either reset it
221 	// without sync (ouch!) or acquire engine first and risk deadlocking
222 	SHOW_ERROR( 0, "Failed waiting for token %d (active token: %d)",
223 		st->counter, /*INREG( ai->regs, RADEON_SCRATCH_REG0 )*/
224 		((uint32 *)(ai->mapped_memory[si->cp.feedback.mem_type].data + si->cp.feedback.scratch_mem_offset))[0] );
225 		//si->cp.scratch.ptr[0] );
226 
227 	Radeon_ResetEngine( ai );
228 
229 	return B_ERROR;
230 }
231