xref: /haiku/src/add-ons/accelerants/radeon/EngineManagment.c (revision 67bce78b48ed6d01b5a8eef89f5694c372b7e0a1)
1 /*
2 	Copyright (c) 2002, Thomas Kurschel
3 
4 
5 	Part of Radeon accelerant
6 
7 	Hardware accelerator management
8 
9 	All accelerator commands go through the following steps:
10 	- accelerant adds command to CP buffer and updates CP write pointer
11 	- CP fetches command and sends it to MicroController
12 	- MicroController instructs 2D unit to execute command
13 	- 2D unit draws into 2D Destination Cache (DC)
14 	- 2D Destination Cache is drained to frame buffer
15 
16 	Whenever a token is required by BeOS, a command is queued to write
17 	the timestamp into Scratch Register 0. I haven't fully understand
18 	when and how coherancy is assured by Radeon, so I assume the following:
19 	- when the timestamp is written, all previous commands have been issued,
20 	  i.e. they are read and executed by the microcontroller
21 	- to make sure previously issued 2D commands have been finished,
22 	  a WAIT_2D_IDLECLEAN command is inserted before the scratch register
23 	  write
24 	- to flush the destination cache, a RB2D_DC_FLUSH_ALL command is
25 	  issued before the wait; I hope that the wait command also waits for
26 	  the flush command, but I'm not sure about that
27 
28 	Remains the cache coherency problem. It you can set various bits in
29 	DSTCACHE_MODE register to assure that, but first I don't really understand
30 	them, and second I'm not sure which other caches/FIFO may make trouble.
31 	Especially, Be wants to use CPU and CP accesses in parallel. Hopefully,
32 	they don't interfere.
33 
34 	I know that the PAINT_MULTI commands makes trouble if you change the
35 	ROP to something else: CPU writes produce garbage in frame buffer for the
36 	next couple of accesses. Resetting the ROP to a simply copy helps, but
37 	I'm not sure what happens with concurrent CPU accesses to other areas
38 	of the frame buffer.
39 */
40 
41 
42 #include "radeon_accelerant.h"
43 #include "generic.h"
44 #include "cp_regs.h"
45 #include "rbbm_regs.h"
46 #include "GlobalData.h"
47 #include "mmio.h"
48 
49 static engine_token radeon_engine_token = { 1, B_2D_ACCELERATION, NULL };
50 
51 // public function: return number of hardware engine
52 uint32 ACCELERANT_ENGINE_COUNT(void)
53 {
54 	// hm, is there *any* card sporting more then
55 	// one hardware accelerator???
56 	return 1;
57 }
58 
59 // write current token into CP stream
60 static void writeSyncToken( accelerator_info *ai )
61 {
62 	uint32 buffer[6];
63 	uint idx = 0;
64 
65 	// don't write token if it hasn't changed since last write
66 	if( ai->si->engine.count == ai->si->engine.written )
67 		return;
68 
69 	// flush pending data
70 	buffer[idx++] = CP_PACKET0( RADEON_RB2D_DSTCACHE_CTLSTAT, 0 );
71 	buffer[idx++] = RADEON_RB2D_DC_FLUSH_ALL;
72 
73 	// make sure commands are finished
74 	buffer[idx++] = CP_PACKET0( RADEON_WAIT_UNTIL, 0 );
75 	buffer[idx++] = RADEON_WAIT_2D_IDLECLEAN |
76 		RADEON_WAIT_3D_IDLECLEAN | RADEON_WAIT_HOST_IDLECLEAN;
77 
78 	// write scratch register
79 	buffer[idx++] = CP_PACKET0( RADEON_SCRATCH_REG0, 0 );
80 	buffer[idx++] = ai->si->engine.count;
81 
82 	ai->si->engine.written = ai->si->engine.count;
83 
84 	Radeon_SendCP( ai, buffer, idx );
85 }
86 
87 // public function: acquire engine for future use
88 //	capabilites - required 2D/3D capabilities of engine, ignored
89 //	max_wait - maximum time we want to wait (in ms?), ignored
90 //	st - when engine has been acquired, wait for this sync token
91 //	et - (out) specifier of the engine acquired
92 status_t ACQUIRE_ENGINE( uint32 capabilities, uint32 max_wait,
93 	sync_token *st, engine_token **et )
94 {
95 	virtual_card *vc = ai->vc;
96 	shared_info *si = ai->si;
97 
98 	SHOW_FLOW0( 4, "" );
99 
100 	ACQUIRE_BEN( si->engine.lock)
101 
102 	if( si->active_vc != vc->id )
103 		Radeon_ActivateVirtualCard( ai );
104 
105 	// wait for sync
106 	if (st)
107 		SYNC_TO_TOKEN( st );
108 
109 	*et = &radeon_engine_token;
110 	return B_OK;
111 }
112 
113 // public function: release accelerator
114 //	et - engine to release
115 //	st - (out) sync token to be filled out
116 status_t RELEASE_ENGINE( engine_token *et, sync_token *st )
117 {
118 	shared_info *si = ai->si;
119 
120 	SHOW_FLOW0( 4, "" );
121 
122 	// fill out sync token
123 	if (st) {
124 		writeSyncToken( ai );
125 
126 		st->engine_id = et->engine_id;
127 		st->counter = si->engine.count;
128 	}
129 
130 	RELEASE_BEN( ai->si->engine.lock )
131 
132 	return B_OK;
133 }
134 
135 // public function: wait until engine is idle
136 // ??? which engine to wait for? Is there anyone using this function?
137 void WAIT_ENGINE_IDLE(void)
138 {
139 	SHOW_FLOW0( 4, "" );
140 
141 	Radeon_Finish( ai );
142 }
143 
144 // public function: get sync token
145 //	et - engine to wait for
146 //	st - (out) sync token to be filled out
147 status_t GET_SYNC_TOKEN( engine_token *et, sync_token *st )
148 {
149 	shared_info *si = ai->si;
150 
151 	SHOW_FLOW0( 4, "" );
152 
153 	writeSyncToken( ai );
154 
155 	st->engine_id = et->engine_id;
156 	st->counter = si->engine.count;
157 
158 	SHOW_FLOW( 4, "got counter=%d", si->engine.count );
159 
160 	return B_OK;
161 }
162 
163 // this is the same as the corresponding kernel function
164 static void spin( uint32 delay )
165 {
166 	bigtime_t start_time;
167 
168 	start_time = system_time();
169 
170 	while( system_time() - start_time < delay )
171 		;
172 }
173 
174 // public: sync to token
175 //	st - token to wait for
176 status_t SYNC_TO_TOKEN( sync_token *st )
177 {
178 	shared_info *si = ai->si;
179 	bigtime_t start_time, sample_time;
180 //	status_t result;
181 
182 	SHOW_FLOW0( 4, "" );
183 
184 	start_time = system_time();
185 
186 	while( 1 ) {
187 		SHOW_FLOW( 4, "passed counter=%d", *si->scratch_ptr );
188 
189 		// a bit nasty: counter is 64 bit, but we have 32 bit only,
190 		// this is a tricky calculation to handle wrap-arounds correctly
191 		/*if( (int32)(*si->scratch_ptr - st->counter) >= 0 )
192 			return B_OK;*/
193 		if( (int32)(INREG( ai->regs, RADEON_SCRATCH_REG0 ) - st->counter) >= 0 )
194 			return B_OK;
195 
196 		sample_time = system_time();
197 
198 		if( sample_time - start_time > 100000 )
199 			break;
200 
201 		// use exponential fall-off
202 		// in the beginning do busy-waiting, later on we let thread sleep
203 		// the micro-spin is used to reduce PCI load
204 		if( sample_time - start_time > 5000 )
205 			snooze( (sample_time - start_time) / 10 );
206 		else
207 			spin( 1 );
208 	}
209 
210 	// we could reset engine now, but caller doesn't need to acquire
211 	// engine before calling this function, so we either reset it
212 	// without sync (ouch!) or acquire engine first and risk deadlocking
213 	SHOW_ERROR( 0, "Failed waiting for token %d (active token: %d)",
214 		st->counter, INREG( ai->regs, RADEON_SCRATCH_REG0 )/**si->scratch_ptr*/ );
215 
216 	return B_ERROR;
217 }
218