1 /* 2 Copyright (c) 2002, Thomas Kurschel 3 4 5 Part of Radeon kernel driver 6 7 PCI GART. 8 9 Currently, we use PCI DMA. Changing to AGP would 10 only affect this file, but AGP-GART is specific to 11 the chipset of the motherboard, and as DMA is really 12 overkill for 2D, I cannot bother writing a dozen 13 of AGP drivers just to gain little extra speedup. 14 */ 15 16 17 #include "radeon_driver.h" 18 #include <malloc.h> 19 #include <image.h> 20 #include "mmio.h" 21 #include "buscntrl_regs.h" 22 #include "memcntrl_regs.h" 23 #include "cp_regs.h" 24 #include <string.h> 25 26 27 #if 1 28 // create actual GART buffer 29 static status_t createGARTBuffer( GART_info *gart, size_t size ) 30 { 31 SHOW_FLOW0( 3, "" ); 32 33 gart->buffer.size = size = (size + B_PAGE_SIZE - 1) & ~(B_PAGE_SIZE - 1); 34 35 // if this buffer is used for PCI BM, cache snooping 36 // takes care of syncing memory accesses; if used for AGP, 37 // we'll have to access via AGP aperture (and mark aperture 38 // as write-combined) as cache consistency doesn't need to 39 // be guaranteed 40 41 // the specs say that some chipsets do kind of lazy flushing 42 // so the graphics card may read obsolete data; up to now 43 // we use PCI only where this shouldn't happen by design; 44 // if we change to AGP we may tweak the pre-charge time of 45 // the write buffer pointer 46 47 // as some variables in accelerant point directly into 48 // the DMA buffer, we have to grant access for all apps 49 gart->buffer.area = create_area( "Radeon PCI GART buffer", 50 &gart->buffer.ptr, B_ANY_KERNEL_ADDRESS, 51 size, B_FULL_LOCK, B_READ_AREA | B_WRITE_AREA | B_USER_CLONEABLE_AREA ); 52 if( gart->buffer.area < 0 ) { 53 SHOW_ERROR( 1, "cannot create PCI GART buffer (%s)", 54 strerror( gart->buffer.area )); 55 return gart->buffer.area; 56 } 57 58 gart->buffer.unaligned_area = -1; 59 60 memset( gart->buffer.ptr, 0, size ); 61 62 return B_OK; 63 } 64 65 #else 66 67 static status_t createGARTBuffer( GART_info *gart, size_t size ) 68 { 69 physical_entry map[1]; 70 void *unaligned_addr, *aligned_phys; 71 72 SHOW_FLOW0( 3, "" ); 73 74 gart->buffer.size = size = (size + B_PAGE_SIZE - 1) & ~(B_PAGE_SIZE - 1); 75 76 // we allocate an contiguous area having twice the size 77 // to be able to find an aligned, contiguous range within it; 78 // the graphics card doesn't care, but the CPU cannot 79 // make an arbitrary area WC'ed, at least elder ones 80 // question: is this necessary for a PCI GART because of bus snooping? 81 gart->buffer.unaligned_area = create_area( "Radeon PCI GART buffer", 82 &unaligned_addr, B_ANY_KERNEL_ADDRESS, 83 2 * size, B_CONTIGUOUS/*B_FULL_LOCK*/, B_READ_AREA | B_WRITE_AREA | B_USER_CLONEABLE_AREA ); 84 if( gart->buffer.unaligned_area < 0 ) { 85 SHOW_ERROR( 1, "cannot create PCI GART buffer (%s)", 86 strerror( gart->buffer.unaligned_area )); 87 return gart->buffer.unaligned_area; 88 } 89 90 get_memory_map( unaligned_addr, B_PAGE_SIZE, map, 1 ); 91 92 aligned_phys = 93 (void **)(((uint32)map[0].address + size - 1) & ~(size - 1)); 94 95 SHOW_FLOW( 3, "aligned_phys=%p", aligned_phys ); 96 97 gart->buffer.area = map_physical_memory( "Radeon aligned PCI GART buffer", 98 aligned_phys, 99 size, B_ANY_KERNEL_BLOCK_ADDRESS | B_MTR_WC, 100 B_READ_AREA | B_WRITE_AREA, &gart->buffer.ptr ); 101 102 if( gart->buffer.area < 0 ) { 103 SHOW_ERROR0( 3, "cannot map buffer with WC" ); 104 gart->buffer.area = map_physical_memory( "Radeon aligned PCI GART buffer", 105 aligned_phys, 106 size, B_ANY_KERNEL_BLOCK_ADDRESS, 107 B_READ_AREA | B_WRITE_AREA, &gart->buffer.ptr ); 108 } 109 110 if( gart->buffer.area < 0 ) { 111 SHOW_ERROR0( 1, "cannot map GART buffer" ); 112 delete_area( gart->buffer.unaligned_area ); 113 gart->buffer.unaligned_area = -1; 114 return gart->buffer.area; 115 } 116 117 memset( gart->buffer.ptr, 0, size ); 118 119 return B_OK; 120 } 121 122 #endif 123 124 // init GATT (could be used for both PCI and AGP) 125 static status_t initGATT( GART_info *gart ) 126 { 127 area_id map_area; 128 uint32 map_area_size; 129 physical_entry *map; 130 physical_entry PTB_map[1]; 131 size_t map_count; 132 uint32 i; 133 uint32 *gatt_entry; 134 size_t num_pages; 135 136 SHOW_FLOW0( 3, "" ); 137 138 num_pages = (gart->buffer.size + B_PAGE_SIZE - 1) & ~(B_PAGE_SIZE - 1); 139 140 // GART must be contignuous 141 gart->GATT.area = create_area( "Radeon GATT", (void **)&gart->GATT.ptr, 142 B_ANY_KERNEL_ADDRESS, 143 (num_pages * sizeof( uint32 ) + B_PAGE_SIZE - 1) & ~(B_PAGE_SIZE - 1), 144 B_CONTIGUOUS, B_READ_AREA | B_WRITE_AREA | B_USER_CLONEABLE_AREA ); 145 146 if( gart->GATT.area < 0 ) { 147 SHOW_ERROR( 1, "cannot create GATT table (%s)", 148 strerror( gart->GATT.area )); 149 return gart->GATT.area; 150 } 151 152 get_memory_map( gart->GATT.ptr, B_PAGE_SIZE, PTB_map, 1 ); 153 gart->GATT.phys = (uint32)PTB_map[0].address; 154 155 SHOW_INFO( 3, "GATT_ptr=%p, GATT_phys=%p", gart->GATT.ptr, 156 (void *)gart->GATT.phys ); 157 158 // get address mapping 159 memset( gart->GATT.ptr, 0, num_pages * sizeof( uint32 )); 160 161 map_count = num_pages + 1; 162 163 // align size to B_PAGE_SIZE 164 map_area_size = map_count * sizeof(physical_entry); 165 if ((map_area_size / B_PAGE_SIZE) * B_PAGE_SIZE != map_area_size) 166 map_area_size = ((map_area_size / B_PAGE_SIZE) + 1) * B_PAGE_SIZE; 167 168 // temporary area where we fill in the memory map (deleted below) 169 map_area = create_area("pci_gart_map_area", (void **)&map, B_ANY_ADDRESS, map_area_size, B_FULL_LOCK, B_READ_AREA | B_WRITE_AREA); 170 dprintf("pci_gart_map_area: %ld\n", map_area); 171 172 get_memory_map( gart->buffer.ptr, gart->buffer.size, map, map_count ); 173 174 // the following looks a bit strange as the kernel 175 // combines successive entries 176 gatt_entry = gart->GATT.ptr; 177 178 for( i = 0; i < map_count; ++i ) { 179 uint32 addr = (uint32)map[i].address; 180 size_t size = map[i].size; 181 182 if( size == 0 ) 183 break; 184 185 while( size > 0 ) { 186 *gatt_entry++ = addr; 187 //SHOW_FLOW( 3, "%lx", *(gart_entry-1) ); 188 addr += ATI_PCIGART_PAGE_SIZE; 189 size -= ATI_PCIGART_PAGE_SIZE; 190 } 191 } 192 193 delete_area(map_area); 194 195 if( i == map_count ) { 196 // this case should never happen 197 SHOW_ERROR0( 0, "memory map of GART buffer too large!" ); 198 delete_area( gart->GATT.area ); 199 gart->GATT.area = -1; 200 return B_ERROR; 201 } 202 203 // this might be a bit more than needed, as 204 // 1. Intel CPUs have "processor order", i.e. writes appear to external 205 // devices in program order, so a simple final write should be sufficient 206 // 2. if it is a PCI GART, bus snooping should provide cache coherence 207 // 3. this function is a no-op :( 208 clear_caches( gart->GATT.ptr, num_pages * sizeof( uint32 ), 209 B_FLUSH_DCACHE ); 210 211 // back to real live - some chipsets have write buffers that 212 // proove all previous assumptions wrong 213 // (don't know whether this really helps though) 214 asm volatile ( "wbinvd" ::: "memory" ); 215 return B_OK; 216 } 217 218 // destroy GART buffer 219 static void destroyGARTBuffer( GART_info *gart ) 220 { 221 if( gart->buffer.area > 0 ) 222 delete_area( gart->buffer.area ); 223 224 if( gart->buffer.unaligned_area > 0 ) 225 delete_area( gart->buffer.unaligned_area ); 226 227 gart->buffer.area = gart->buffer.unaligned_area = -1; 228 } 229 230 231 // destroy GATT 232 static void destroyGATT( GART_info *gart ) 233 { 234 if( gart->GATT.area > 0 ) 235 delete_area( gart->GATT.area ); 236 237 gart->GATT.area = -1; 238 } 239 240 241 // init PCI GART 242 status_t Radeon_InitPCIGART( device_info *di ) 243 { 244 status_t result; 245 246 result = createGARTBuffer( &di->pci_gart, PCI_GART_SIZE ); 247 if( result < 0 ) 248 goto err1; 249 250 result = initGATT( &di->pci_gart ); 251 if( result < 0 ) 252 goto err2; 253 254 return B_OK; 255 256 err2: 257 destroyGARTBuffer( &di->pci_gart ); 258 259 err1: 260 return result; 261 } 262 263 264 // cleanup PCI GART 265 void Radeon_CleanupPCIGART( device_info *di ) 266 { 267 vuint8 *regs = di->regs; 268 269 SHOW_FLOW0( 3, "" ); 270 271 // perhaps we should wait for FIFO space before messing around with registers, but 272 // 1. I don't want to add all the sync stuff to the kernel driver 273 // 2. I doubt that these regs are buffered by FIFO 274 // but still: in worst case CP has written some commands to register FIFO, 275 // which can do any kind of nasty things 276 277 // disable CP BM 278 OUTREG( regs, RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIDIS_INDDIS ); 279 // read-back for flushing 280 INREG( regs, RADEON_CP_CSQ_CNTL ); 281 282 // disable bus mastering 283 OUTREGP( regs, RADEON_BUS_CNTL, RADEON_BUS_MASTER_DIS, ~RADEON_BUS_MASTER_DIS ); 284 // disable PCI GART 285 OUTREGP( regs, RADEON_AIC_CNTL, 0, ~RADEON_PCIGART_TRANSLATE_EN ); 286 287 destroyGATT( &di->pci_gart ); 288 destroyGARTBuffer( &di->pci_gart ); 289 } 290