1 /* 2 Copyright (c) 2002, Thomas Kurschel 3 4 5 Part of Radeon accelerant 6 7 Hardware accelerator management 8 9 All accelerator commands go through the following steps: 10 - accelerant adds command to CP buffer and updates CP write pointer 11 - CP fetches command and sends it to MicroController 12 - MicroController instructs 2D unit to execute command 13 - 2D unit draws into 2D Destination Cache (DC) 14 - 2D Destination Cache is drained to frame buffer 15 16 Whenever a token is required by BeOS, a command is queued to write 17 the timestamp into Scratch Register 0. I haven't fully understand 18 when and how coherancy is assured by Radeon, so I assume the following: 19 - when the timestamp is written, all previous commands have been issued, 20 i.e. they are read and executed by the microcontroller 21 - to make sure previously issued 2D commands have been finished, 22 a WAIT_2D_IDLECLEAN command is inserted before the scratch register 23 write 24 - to flush the destination cache, a RB2D_DC_FLUSH_ALL command is 25 issued before the wait; I hope that the wait command also waits for 26 the flush command, but I'm not sure about that 27 28 Remains the cache coherency problem. It you can set various bits in 29 DSTCACHE_MODE register to assure that, but first I don't really understand 30 them, and second I'm not sure which other caches/FIFO may make trouble. 31 Especially, Be wants to use CPU and CP accesses in parallel. Hopefully, 32 they don't interfere. 33 34 I know that the PAINT_MULTI commands makes trouble if you change the 35 ROP to something else: CPU writes produce garbage in frame buffer for the 36 next couple of accesses. Resetting the ROP to a simply copy helps, but 37 I'm not sure what happens with concurrent CPU accesses to other areas 38 of the frame buffer. 39 */ 40 41 42 #include "radeon_accelerant.h" 43 #include "generic.h" 44 #include "rbbm_regs.h" 45 #include "GlobalData.h" 46 #include "mmio.h" 47 #include "CP.h" 48 49 static engine_token radeon_engine_token = { 1, B_2D_ACCELERATION, NULL }; 50 51 // public function: return number of hardware engine 52 uint32 ACCELERANT_ENGINE_COUNT(void) 53 { 54 // hm, is there *any* card sporting more then 55 // one hardware accelerator??? 56 return 1; 57 } 58 59 // write current sync token into CP stream; 60 // we instruct the CP to flush all kind of cache first to not interfere 61 // with subsequent host writes 62 static void writeSyncToken( accelerator_info *ai ) 63 { 64 // don't write token if it hasn't changed since last write 65 if( ai->si->engine.count == ai->si->engine.written ) 66 return; 67 68 START_IB(); 69 70 // flush pending data 71 WRITE_IB_REG( RADEON_RB2D_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL ); 72 73 // make sure commands are finished 74 WRITE_IB_REG( RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN | 75 RADEON_WAIT_3D_IDLECLEAN | RADEON_WAIT_HOST_IDLECLEAN ); 76 77 // write scratch register 78 WRITE_IB_REG( RADEON_SCRATCH_REG0, ai->si->engine.count ); 79 80 ai->si->engine.written = ai->si->engine.count; 81 82 SUBMIT_IB(); 83 } 84 85 // public function: acquire engine for future use 86 // capabilites - required 2D/3D capabilities of engine, ignored 87 // max_wait - maximum time we want to wait (in ms?), ignored 88 // st - when engine has been acquired, wait for this sync token 89 // et - (out) specifier of the engine acquired 90 status_t ACQUIRE_ENGINE( uint32 capabilities, uint32 max_wait, 91 sync_token *st, engine_token **et ) 92 { 93 shared_info *si = ai->si; 94 95 SHOW_FLOW0( 4, "" ); 96 97 (void)capabilities; 98 (void)max_wait; 99 100 ACQUIRE_BEN( si->engine.lock) 101 102 // wait for sync 103 if (st) 104 SYNC_TO_TOKEN( st ); 105 106 *et = &radeon_engine_token; 107 return B_OK; 108 } 109 110 // public function: release accelerator 111 // et - engine to release 112 // st - (out) sync token to be filled out 113 status_t RELEASE_ENGINE( engine_token *et, sync_token *st ) 114 { 115 shared_info *si = ai->si; 116 117 SHOW_FLOW0( 4, "" ); 118 119 // fill out sync token 120 if (st) { 121 writeSyncToken( ai ); 122 123 st->engine_id = et->engine_id; 124 st->counter = si->engine.count; 125 } 126 127 RELEASE_BEN( ai->si->engine.lock ) 128 129 return B_OK; 130 } 131 132 // public function: wait until engine is idle 133 // ??? which engine to wait for? Is there anyone using this function? 134 // is lock hold? 135 void WAIT_ENGINE_IDLE(void) 136 { 137 SHOW_FLOW0( 4, "" ); 138 139 Radeon_WaitForIdle( ai, false ); 140 } 141 142 // public function: get sync token 143 // et - engine to wait for 144 // st - (out) sync token to be filled out 145 status_t GET_SYNC_TOKEN( engine_token *et, sync_token *st ) 146 { 147 shared_info *si = ai->si; 148 149 SHOW_FLOW0( 4, "" ); 150 151 writeSyncToken( ai ); 152 153 st->engine_id = et->engine_id; 154 st->counter = si->engine.count; 155 156 SHOW_FLOW( 4, "got counter=%d", si->engine.count ); 157 158 return B_OK; 159 } 160 161 // this is the same as the corresponding kernel function 162 void Radeon_Spin( uint32 delay ) 163 { 164 bigtime_t start_time; 165 166 start_time = system_time(); 167 168 while( system_time() - start_time < delay ) 169 ; 170 } 171 172 // public: sync to token 173 // st - token to wait for 174 status_t SYNC_TO_TOKEN( sync_token *st ) 175 { 176 shared_info *si = ai->si; 177 bigtime_t start_time, sample_time; 178 179 SHOW_FLOW0( 4, "" ); 180 181 start_time = system_time(); 182 183 while( 1 ) { 184 SHOW_FLOW( 4, "passed counter=%d", 185 ((uint32 *)(ai->mapped_memory[si->cp.feedback.mem_type].data + si->cp.feedback.scratch_mem_offset))[0] ); 186 //si->cp.scratch.ptr[0] ); 187 188 // a bit nasty: counter is 64 bit, but we have 32 bit only, 189 // this is a tricky calculation to handle wrap-arounds correctly 190 if( (int32)( 191 ((uint32 *)(ai->mapped_memory[si->cp.feedback.mem_type].data + si->cp.feedback.scratch_mem_offset))[0] 192 //si->cp.scratch.ptr[0] 193 - st->counter) >= 0 ) 194 return B_OK; 195 /*if( (int32)(INREG( ai->regs, RADEON_SCRATCH_REG0 ) - st->counter) >= 0 ) 196 return B_OK;*/ 197 198 // commands have not been finished; 199 // this is a good time to free completed buffers as we have to 200 // busy-wait anyway 201 ACQUIRE_BEN( si->cp.lock ); 202 Radeon_FreeIndirectBuffers( ai ); 203 RELEASE_BEN( si->cp.lock ); 204 205 sample_time = system_time(); 206 207 if( sample_time - start_time > 100000 ) 208 break; 209 210 // use exponential fall-off 211 // in the beginning do busy-waiting, later on we let thread sleep 212 // the micro-spin is used to reduce PCI load 213 if( sample_time - start_time > 5000 ) 214 snooze( (sample_time - start_time) / 10 ); 215 else 216 Radeon_Spin( 1 ); 217 } 218 219 // we could reset engine now, but caller doesn't need to acquire 220 // engine before calling this function, so we either reset it 221 // without sync (ouch!) or acquire engine first and risk deadlocking 222 SHOW_ERROR( 0, "Failed waiting for token %d (active token: %d)", 223 st->counter, /*INREG( ai->regs, RADEON_SCRATCH_REG0 )*/ 224 ((uint32 *)(ai->mapped_memory[si->cp.feedback.mem_type].data + si->cp.feedback.scratch_mem_offset))[0] ); 225 //si->cp.scratch.ptr[0] ); 226 227 Radeon_ResetEngine( ai ); 228 229 return B_ERROR; 230 } 231