1 /* 2 Copyright (c) 2002, Thomas Kurschel 3 4 5 Part of Radeon accelerant 6 7 Command Processor handling 8 */ 9 10 #include "radeon_accelerant.h" 11 #include "mmio.h" 12 #include "CPMicroCode.h" 13 #include "cp_regs.h" 14 #include "buscntrl_regs.h" 15 #include "utils.h" 16 #include <sys/ioctl.h> 17 18 #include "log_coll.h" 19 #include "log_enum.h" 20 21 #include <string.h> 22 23 uint getAvailRingBuffer( accelerator_info *ai ); 24 25 26 // non-local memory is used as following: 27 // - 0x10000 dwords for ring buffer 28 // - 8 dwords for returned data (i.e. current read ptr) 29 // - 6 dwords for "scratch registers" 30 // 31 // usage of scratch registers: 32 // - reg 0 = reached engine.count 33 // 34 // the ring buffer stuff must be at a constant offset as 35 // clones cannot be informed if it were changed 36 37 38 // upload Micro-Code of CP 39 static void loadMicroEngineRAMData( accelerator_info *ai ) 40 { 41 int i; 42 const uint32 (*microcode)[2]; 43 44 SHOW_FLOW0( 3, "" ); 45 46 switch( ai->si->asic ) { 47 case rt_r300: 48 case rt_r300_4p: 49 case rt_rv350: 50 case rt_rv360: 51 case rt_r350: 52 case rt_r360: 53 microcode = r300_cp_microcode; 54 break; 55 case rt_r200: 56 //case rt_rv250: 57 //case rt_m9: 58 microcode = r200_cp_microcode; 59 break; 60 default: 61 microcode = radeon_cp_microcode; 62 } 63 64 Radeon_WaitForIdle( ai ); 65 66 OUTREG( ai->regs, RADEON_CP_ME_RAM_ADDR, 0 ); 67 68 for ( i = 0 ; i < 256 ; i++ ) { 69 OUTREG( ai->regs, RADEON_CP_ME_RAM_DATAH, microcode[i][1] ); 70 OUTREG( ai->regs, RADEON_CP_ME_RAM_DATAL, microcode[i][0] ); 71 } 72 } 73 74 // convert CPU's to graphics card's virtual address 75 #define CPU2GC( addr ) (((uint32)(addr) - (uint32)si->nonlocal_mem) + si->nonlocal_vm_start) 76 77 // initialize bus mastering 78 static status_t setupCPRegisters( accelerator_info *ai, int aring_size ) 79 { 80 vuint8 *regs = ai->regs; 81 shared_info *si = ai->si; 82 uint32 tmp; 83 84 #if 0 85 { 86 // allocate ring buffer etc. from local memory instead of PCI memory 87 radeon_alloc_local_mem am; 88 89 am.magic = RADEON_PRIVATE_DATA_MAGIC; 90 am.size = (aring_size + 14) * 4; 91 92 if( ioctl( ai->fd, RADEON_ALLOC_LOCAL_MEM, &am ) != B_OK ) 93 SHOW_ERROR0( 0, "Cannot allocate ring buffer from local memory" ); 94 else { 95 si->nonlocal_vm_start = am.fb_offset; 96 si->nonlocal_mem = (uint32 *)(si->framebuffer + am.fb_offset); 97 } 98 } 99 #endif 100 101 memset( &si->ring, 0, sizeof( si->ring )); 102 103 // set write pointer delay to zero; 104 // we assume that memory synchronization is done correctly my MoBo 105 // and Radeon_SendCP contains a hack that hopefully fixes such problems 106 OUTREG( regs, RADEON_CP_RB_WPTR_DELAY, 0 ); 107 108 // setup CP buffer 109 si->ring.start = si->nonlocal_mem; 110 si->ring.size = aring_size; 111 OUTREG( regs, RADEON_CP_RB_BASE, CPU2GC( si->ring.start )); 112 SHOW_INFO( 3, "CP buffer address=%lx", CPU2GC( si->ring.start )); 113 114 // setup CP read pointer buffer 115 si->ring.head = si->ring.start + si->ring.size; 116 OUTREG( regs, RADEON_CP_RB_RPTR_ADDR, CPU2GC( si->ring.head )); 117 SHOW_INFO( 3, "CP read pointer buffer==%lx", CPU2GC( si->ring.head )); 118 119 // set ring buffer size 120 // (it's log2 of qwords) 121 OUTREG( regs, RADEON_CP_RB_CNTL, log2( si->ring.size / 2 )); 122 SHOW_INFO( 3, "CP buffer size mask=%ld", log2( si->ring.size / 2 ) ); 123 124 // set CP buffer pointers 125 OUTREG( regs, RADEON_CP_RB_RPTR, 0 ); 126 OUTREG( regs, RADEON_CP_RB_WPTR, 0 ); 127 *si->ring.head = 0; 128 si->ring.tail = 0; 129 130 // setup scratch register buffer 131 si->scratch_ptr = si->ring.head + RADEON_SCRATCH_REG_OFFSET / sizeof( uint32 ); 132 OUTREG( regs, RADEON_SCRATCH_ADDR, CPU2GC( si->scratch_ptr )); 133 OUTREG( regs, RADEON_SCRATCH_UMSK, 0x3f ); 134 135 Radeon_WaitForIdle( ai ); 136 137 // enable bus mastering 138 #if 1 139 tmp = INREG( ai->regs, RADEON_BUS_CNTL ) & ~RADEON_BUS_MASTER_DIS; 140 OUTREG( regs, RADEON_BUS_CNTL, tmp ); 141 #endif 142 143 // sync units 144 OUTREG( regs, RADEON_ISYNC_CNTL, 145 (RADEON_ISYNC_ANY2D_IDLE3D | 146 RADEON_ISYNC_ANY3D_IDLE2D | 147 RADEON_ISYNC_WAIT_IDLEGUI | 148 RADEON_ISYNC_CPSCRATCH_IDLEGUI) ); 149 150 return B_OK; 151 } 152 153 154 // get number of free entries in CP's ring buffer 155 uint getAvailRingBuffer( accelerator_info *ai ) 156 { 157 shared_info *si = ai->si; 158 int space; 159 160 // space = *si->ring.head - si->ring.tail; 161 space = INREG( ai->regs, RADEON_CP_RB_RPTR ) - si->ring.tail; 162 163 if( space <= 0 ) 164 space += si->ring.size; 165 166 // don't fill up the entire buffer as we cannot 167 // distinguish between a full and an empty ring 168 --space; 169 170 SHOW_FLOW( 4, "head=%ld, tail=%ld, space=%ld", *si->ring.head, si->ring.tail, space ); 171 172 LOG1( si->log, _GetAvailRingBufferQueue, space ); 173 174 return space; 175 } 176 177 // initialize CP so it's ready for BM 178 status_t Radeon_InitCP( accelerator_info *ai ) 179 { 180 // shared_info *si = ai->si; 181 status_t result; 182 183 SHOW_FLOW0( 3, "" ); 184 185 // init raw CP 186 loadMicroEngineRAMData( ai ); 187 188 // do soft-reset 189 Radeon_ResetEngine( ai ); 190 191 // after warm-reset, the CP may still be active and thus react to 192 // register writes during initialization unpredictably, so we better 193 // stop it first 194 OUTREG( ai->regs, RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIDIS_INDDIS ); 195 INREG( ai->regs, RADEON_CP_CSQ_CNTL ); 196 197 // reset CP to make disabling active 198 Radeon_ResetEngine( ai ); 199 200 // setup CP memory ranges 201 result = setupCPRegisters( ai, 0x10000 ); 202 if( result < 0 ) 203 return result; 204 205 // tell CP to use BM 206 Radeon_WaitForIdle( ai ); 207 OUTREG( ai->regs, RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIBM_INDBM ); 208 209 // this may be a bit too much 210 Radeon_SendPurgeCache( ai ); 211 Radeon_SendWaitUntilIdle( ai ); 212 213 return B_OK; 214 } 215 216 217 // write to register via CP 218 void Radeon_WriteRegCP( accelerator_info *ai, uint32 reg, uint32 value ) 219 { 220 uint32 buffer[2]; 221 222 SHOW_FLOW0( 4, "" ); 223 224 LOG2( ai->si->log, _Radeon_WriteRegFifo, reg, value ); 225 226 buffer[0] = CP_PACKET0( reg, 0 ); 227 buffer[1] = value; 228 229 Radeon_SendCP( ai, buffer, 2 ); 230 } 231 232 233 // send packets to CP 234 void Radeon_SendCP( accelerator_info *ai, uint32 *buffer, uint32 num_dwords ) 235 { 236 shared_info *si = ai->si; 237 238 SHOW_FLOW( 4, "num_dwords=%d", num_dwords ); 239 240 while( num_dwords > 0 ) { 241 uint32 space; 242 uint32 max_copy; 243 // uint i; 244 245 space = getAvailRingBuffer( ai ); 246 247 if( space == 0 ) 248 continue; 249 250 max_copy = min( space, num_dwords ); 251 252 #ifdef ENABLE_LOGGING 253 for( i = 0; i < max_copy; ++i ) 254 LOG1( si->log, _Radeon_SendCP, buffer[i] ); 255 #endif 256 257 if( si->ring.tail + max_copy >= si->ring.size ) { 258 uint32 sub_len; 259 260 sub_len = si->ring.size - si->ring.tail; 261 memcpy( si->ring.start + si->ring.tail, buffer, sub_len * sizeof( uint32 )); 262 buffer += sub_len; 263 num_dwords -= sub_len; 264 max_copy -= sub_len; 265 si->ring.tail = 0; 266 } 267 268 memcpy( si->ring.start + si->ring.tail, buffer, max_copy * sizeof( uint32 ) ); 269 buffer += max_copy; 270 num_dwords -= max_copy; 271 if( si->ring.tail + max_copy < si->ring.size ) 272 si->ring.tail += max_copy; 273 else 274 si->ring.tail = 0; 275 } 276 277 // some chipsets have problems with write buffers; effectively, the command 278 // list we've just created gets delayed in some queue and the graphics chip 279 // reads out-dated commands, which don't make sense and thus crash the 280 // graphics card 281 282 // flush writes to ring 283 // (this code is a bit of a overkill - currently, only some WinChip/Cyrix 284 // CPU's support out-of-order writes, but we are prepared) 285 __asm__ __volatile__ ("lock; addl $0,0(%%esp)": : :"memory"); 286 // make sure the chipset has flushed its write buffer by 287 // reading some uncached memory 288 (void)*si->ring.head; 289 290 // now, the command list should really be written to memory, 291 // so it's safe to instruct the graphics card to read it 292 OUTREG( ai->regs, RADEON_CP_RB_WPTR, si->ring.tail ); 293 294 // read from PCI bus to ensure correct posting 295 INREG( ai->regs, RADEON_CP_RB_RPTR ); 296 } 297