1 /* 2 Copyright (c) 2002, Thomas Kurschel 3 4 5 Part of Radeon accelerant 6 7 Hardware accelerator management 8 9 All accelerator commands go through the following steps: 10 - accelerant adds command to CP buffer and updates CP write pointer 11 - CP fetches command and sends it to MicroController 12 - MicroController instructs 2D unit to execute command 13 - 2D unit draws into 2D Destination Cache (DC) 14 - 2D Destination Cache is drained to frame buffer 15 16 Whenever a token is required by BeOS, a command is queued to write 17 the timestamp into Scratch Register 0. I haven't fully understand 18 when and how coherancy is assured by Radeon, so I assume the following: 19 - when the timestamp is written, all previous commands have been issued, 20 i.e. they are read and executed by the microcontroller 21 - to make sure previously issued 2D commands have been finished, 22 a WAIT_2D_IDLECLEAN command is inserted before the scratch register 23 write 24 - to flush the destination cache, a RB2D_DC_FLUSH_ALL command is 25 issued before the wait; I hope that the wait command also waits for 26 the flush command, but I'm not sure about that 27 28 Remains the cache coherency problem. It you can set various bits in 29 DSTCACHE_MODE register to assure that, but first I don't really understand 30 them, and second I'm not sure which other caches/FIFO may make trouble. 31 Especially, Be wants to use CPU and CP accesses in parallel. Hopefully, 32 they don't interfere. 33 34 I know that the PAINT_MULTI commands makes trouble if you change the 35 ROP to something else: CPU writes produce garbage in frame buffer for the 36 next couple of accesses. Resetting the ROP to a simply copy helps, but 37 I'm not sure what happens with concurrent CPU accesses to other areas 38 of the frame buffer. 39 */ 40 41 42 #include "radeon_accelerant.h" 43 #include "generic.h" 44 #include "cp_regs.h" 45 #include "rbbm_regs.h" 46 #include "GlobalData.h" 47 #include "mmio.h" 48 49 static engine_token radeon_engine_token = { 1, B_2D_ACCELERATION, NULL }; 50 51 // public function: return number of hardware engine 52 uint32 ACCELERANT_ENGINE_COUNT(void) 53 { 54 // hm, is there *any* card sporting more then 55 // one hardware accelerator??? 56 return 1; 57 } 58 59 // write current token into CP stream 60 static void writeSyncToken( accelerator_info *ai ) 61 { 62 uint32 buffer[6]; 63 uint idx = 0; 64 65 // don't write token if it hasn't changed since last write 66 if( ai->si->engine.count == ai->si->engine.written ) 67 return; 68 69 // flush pending data 70 buffer[idx++] = CP_PACKET0( RADEON_RB2D_DSTCACHE_CTLSTAT, 0 ); 71 buffer[idx++] = RADEON_RB2D_DC_FLUSH_ALL; 72 73 // make sure commands are finished 74 buffer[idx++] = CP_PACKET0( RADEON_WAIT_UNTIL, 0 ); 75 buffer[idx++] = RADEON_WAIT_2D_IDLECLEAN | 76 RADEON_WAIT_3D_IDLECLEAN | RADEON_WAIT_HOST_IDLECLEAN; 77 78 // write scratch register 79 buffer[idx++] = CP_PACKET0( RADEON_SCRATCH_REG0, 0 ); 80 buffer[idx++] = ai->si->engine.count; 81 82 ai->si->engine.written = ai->si->engine.count; 83 84 Radeon_SendCP( ai, buffer, idx ); 85 } 86 87 // public function: acquire engine for future use 88 // capabilites - required 2D/3D capabilities of engine, ignored 89 // max_wait - maximum time we want to wait (in ms?), ignored 90 // st - when engine has been acquired, wait for this sync token 91 // et - (out) specifier of the engine acquired 92 status_t ACQUIRE_ENGINE( uint32 capabilities, uint32 max_wait, 93 sync_token *st, engine_token **et ) 94 { 95 virtual_card *vc = ai->vc; 96 shared_info *si = ai->si; 97 98 SHOW_FLOW0( 4, "" ); 99 100 ACQUIRE_BEN( si->engine.lock) 101 102 if( si->active_vc != vc->id ) 103 Radeon_ActivateVirtualCard( ai ); 104 105 // wait for sync 106 if (st) 107 SYNC_TO_TOKEN( st ); 108 109 *et = &radeon_engine_token; 110 return B_OK; 111 } 112 113 // public function: release accelerator 114 // et - engine to release 115 // st - (out) sync token to be filled out 116 status_t RELEASE_ENGINE( engine_token *et, sync_token *st ) 117 { 118 shared_info *si = ai->si; 119 120 SHOW_FLOW0( 4, "" ); 121 122 // fill out sync token 123 if (st) { 124 writeSyncToken( ai ); 125 126 st->engine_id = et->engine_id; 127 st->counter = si->engine.count; 128 } 129 130 RELEASE_BEN( ai->si->engine.lock ) 131 132 return B_OK; 133 } 134 135 // public function: wait until engine is idle 136 // ??? which engine to wait for? Is there anyone using this function? 137 void WAIT_ENGINE_IDLE(void) 138 { 139 SHOW_FLOW0( 4, "" ); 140 141 Radeon_Finish( ai ); 142 } 143 144 // public function: get sync token 145 // et - engine to wait for 146 // st - (out) sync token to be filled out 147 status_t GET_SYNC_TOKEN( engine_token *et, sync_token *st ) 148 { 149 shared_info *si = ai->si; 150 151 SHOW_FLOW0( 4, "" ); 152 153 writeSyncToken( ai ); 154 155 st->engine_id = et->engine_id; 156 st->counter = si->engine.count; 157 158 SHOW_FLOW( 4, "got counter=%d", si->engine.count ); 159 160 return B_OK; 161 } 162 163 // this is the same as the corresponding kernel function 164 static void spin( uint32 delay ) 165 { 166 bigtime_t start_time; 167 168 start_time = system_time(); 169 170 while( system_time() - start_time < delay ) 171 ; 172 } 173 174 // public: sync to token 175 // st - token to wait for 176 status_t SYNC_TO_TOKEN( sync_token *st ) 177 { 178 shared_info *si = ai->si; 179 bigtime_t start_time, sample_time; 180 // status_t result; 181 182 SHOW_FLOW0( 4, "" ); 183 184 start_time = system_time(); 185 186 while( 1 ) { 187 SHOW_FLOW( 4, "passed counter=%d", *si->scratch_ptr ); 188 189 // a bit nasty: counter is 64 bit, but we have 32 bit only, 190 // this is a tricky calculation to handle wrap-arounds correctly 191 /*if( (int32)(*si->scratch_ptr - st->counter) >= 0 ) 192 return B_OK;*/ 193 if( (int32)(INREG( ai->regs, RADEON_SCRATCH_REG0 ) - st->counter) >= 0 ) 194 return B_OK; 195 196 sample_time = system_time(); 197 198 if( sample_time - start_time > 100000 ) 199 break; 200 201 // use exponential fall-off 202 // in the beginning do busy-waiting, later on we let thread sleep 203 // the micro-spin is used to reduce PCI load 204 if( sample_time - start_time > 5000 ) 205 snooze( (sample_time - start_time) / 10 ); 206 else 207 spin( 1 ); 208 } 209 210 // we could reset engine now, but caller doesn't need to acquire 211 // engine before calling this function, so we either reset it 212 // without sync (ouch!) or acquire engine first and risk deadlocking 213 SHOW_ERROR( 0, "Failed waiting for token %d (active token: %d)", 214 st->counter, INREG( ai->regs, RADEON_SCRATCH_REG0 )/**si->scratch_ptr*/ ); 215 216 return B_ERROR; 217 } 218