1 /* 2 Copyright (c) 2002, Thomas Kurschel 3 4 Part of Radeon kernel driver 5 6 PCI GART. 7 8 Currently, we use PCI DMA. Changing to AGP would 9 only affect this file, but AGP-GART is specific to 10 the chipset of the motherboard, and as DMA is really 11 overkill for 2D, I cannot bother writing a dozen 12 of AGP drivers just to gain little extra speedup. 13 */ 14 15 16 #include "radeon_driver.h" 17 #include "mmio.h" 18 #include "buscntrl_regs.h" 19 #include "memcntrl_regs.h" 20 #include "cp_regs.h" 21 22 #include <image.h> 23 24 #include <stdlib.h> 25 #include <string.h> 26 27 28 #if 1 29 //! create actual GART buffer 30 static status_t 31 createGARTBuffer(GART_info *gart, size_t size) 32 { 33 SHOW_FLOW0( 3, "" ); 34 35 gart->buffer.size = size = (size + B_PAGE_SIZE - 1) & ~(B_PAGE_SIZE - 1); 36 37 // if this buffer is used for PCI BM, cache snooping 38 // takes care of syncing memory accesses; if used for AGP, 39 // we'll have to access via AGP aperture (and mark aperture 40 // as write-combined) as cache consistency doesn't need to 41 // be guaranteed 42 43 // the specs say that some chipsets do kind of lazy flushing 44 // so the graphics card may read obsolete data; up to now 45 // we use PCI only where this shouldn't happen by design; 46 // if we change to AGP we may tweak the pre-charge time of 47 // the write buffer pointer 48 49 // as some variables in accelerant point directly into 50 // the DMA buffer, we have to grant access for all apps 51 gart->buffer.area = create_area("Radeon PCI GART buffer", 52 &gart->buffer.ptr, B_ANY_KERNEL_ADDRESS, 53 size, B_FULL_LOCK, 54 #ifdef HAIKU_TARGET_PLATFORM_HAIKU 55 // TODO: really user read/write? 56 B_READ_AREA | B_WRITE_AREA | B_USER_CLONEABLE_AREA 57 #else 58 0 59 #endif 60 ); 61 if (gart->buffer.area < 0) { 62 SHOW_ERROR(1, "cannot create PCI GART buffer (%s)", 63 strerror(gart->buffer.area)); 64 return gart->buffer.area; 65 } 66 67 gart->buffer.unaligned_area = -1; 68 69 memset( gart->buffer.ptr, 0, size ); 70 71 return B_OK; 72 } 73 74 #else 75 76 static status_t createGARTBuffer( GART_info *gart, size_t size ) 77 { 78 physical_entry map[1]; 79 void *unaligned_addr, *aligned_phys; 80 81 SHOW_FLOW0( 3, "" ); 82 83 gart->buffer.size = size = (size + B_PAGE_SIZE - 1) & ~(B_PAGE_SIZE - 1); 84 85 // we allocate an contiguous area having twice the size 86 // to be able to find an aligned, contiguous range within it; 87 // the graphics card doesn't care, but the CPU cannot 88 // make an arbitrary area WC'ed, at least elder ones 89 // question: is this necessary for a PCI GART because of bus snooping? 90 gart->buffer.unaligned_area = create_area( "Radeon PCI GART buffer", 91 &unaligned_addr, B_ANY_KERNEL_ADDRESS, 92 2 * size, B_CONTIGUOUS/*B_FULL_LOCK*/, B_READ_AREA | B_WRITE_AREA | B_USER_CLONEABLE_AREA ); 93 // TODO: Physical aligning can be done without waste using the 94 // private create_area_etc(). 95 if (gart->buffer.unaligned_area < 0) { 96 SHOW_ERROR( 1, "cannot create PCI GART buffer (%s)", 97 strerror( gart->buffer.unaligned_area )); 98 return gart->buffer.unaligned_area; 99 } 100 101 get_memory_map( unaligned_addr, B_PAGE_SIZE, map, 1 ); 102 103 aligned_phys = 104 (void **)((map[0].address + size - 1) & ~(size - 1)); 105 106 SHOW_FLOW( 3, "aligned_phys=%p", aligned_phys ); 107 108 gart->buffer.area = map_physical_memory( "Radeon aligned PCI GART buffer", 109 (addr_t)aligned_phys, 110 size, B_ANY_KERNEL_BLOCK_ADDRESS | B_MTR_WC, 111 B_READ_AREA | B_WRITE_AREA, &gart->buffer.ptr ); 112 113 if( gart->buffer.area < 0 ) { 114 SHOW_ERROR0( 3, "cannot map buffer with WC" ); 115 gart->buffer.area = map_physical_memory( "Radeon aligned PCI GART buffer", 116 (addr_t)aligned_phys, 117 size, B_ANY_KERNEL_BLOCK_ADDRESS, 118 B_READ_AREA | B_WRITE_AREA, &gart->buffer.ptr ); 119 } 120 121 if( gart->buffer.area < 0 ) { 122 SHOW_ERROR0( 1, "cannot map GART buffer" ); 123 delete_area( gart->buffer.unaligned_area ); 124 gart->buffer.unaligned_area = -1; 125 return gart->buffer.area; 126 } 127 128 memset( gart->buffer.ptr, 0, size ); 129 130 return B_OK; 131 } 132 133 #endif 134 135 // init GATT (could be used for both PCI and AGP) 136 static status_t initGATT( GART_info *gart ) 137 { 138 area_id map_area; 139 uint32 map_area_size; 140 physical_entry *map; 141 physical_entry PTB_map[1]; 142 size_t map_count; 143 uint32 i; 144 uint32 *gatt_entry; 145 size_t num_pages; 146 147 SHOW_FLOW0( 3, "" ); 148 149 num_pages = (gart->buffer.size + B_PAGE_SIZE - 1) & ~(B_PAGE_SIZE - 1); 150 151 // GART must be contiguous 152 gart->GATT.area = create_area("Radeon GATT", (void **)&gart->GATT.ptr, 153 B_ANY_KERNEL_ADDRESS, 154 (num_pages * sizeof( uint32 ) + B_PAGE_SIZE - 1) & ~(B_PAGE_SIZE - 1), 155 B_32_BIT_CONTIGUOUS, 156 // TODO: Physical address is cast to 32 bit below! Use B_CONTIGUOUS, 157 // when that is (/can be) fixed! 158 #ifdef HAIKU_TARGET_PLATFORM_HAIKU 159 // TODO: really user read/write? 160 B_READ_AREA | B_WRITE_AREA | B_USER_CLONEABLE_AREA 161 #else 162 0 163 #endif 164 ); 165 166 if (gart->GATT.area < 0) { 167 SHOW_ERROR(1, "cannot create GATT table (%s)", 168 strerror(gart->GATT.area)); 169 return gart->GATT.area; 170 } 171 172 get_memory_map(gart->GATT.ptr, B_PAGE_SIZE, PTB_map, 1); 173 gart->GATT.phys = PTB_map[0].address; 174 175 SHOW_INFO(3, "GATT_ptr=%p, GATT_phys=%p", gart->GATT.ptr, 176 (void *)gart->GATT.phys); 177 178 // get address mapping 179 memset(gart->GATT.ptr, 0, num_pages * sizeof(uint32)); 180 181 map_count = num_pages + 1; 182 183 // align size to B_PAGE_SIZE 184 map_area_size = map_count * sizeof(physical_entry); 185 if ((map_area_size / B_PAGE_SIZE) * B_PAGE_SIZE != map_area_size) 186 map_area_size = ((map_area_size / B_PAGE_SIZE) + 1) * B_PAGE_SIZE; 187 188 // temporary area where we fill in the memory map (deleted below) 189 map_area = create_area("pci_gart_map_area", (void **)&map, B_ANY_ADDRESS, 190 map_area_size, B_FULL_LOCK, B_READ_AREA | B_WRITE_AREA); 191 // TODO: We actually have a working malloc() in the kernel. Why create 192 // an area? 193 dprintf("pci_gart_map_area: %ld\n", map_area); 194 195 get_memory_map( gart->buffer.ptr, gart->buffer.size, map, map_count ); 196 197 // the following looks a bit strange as the kernel 198 // combines successive entries 199 gatt_entry = gart->GATT.ptr; 200 201 for( i = 0; i < map_count; ++i ) { 202 phys_addr_t addr = map[i].address; 203 size_t size = map[i].size; 204 205 if( size == 0 ) 206 break; 207 208 while( size > 0 ) { 209 *gatt_entry++ = addr; 210 //SHOW_FLOW( 3, "%lx", *(gart_entry-1) ); 211 addr += ATI_PCIGART_PAGE_SIZE; 212 size -= ATI_PCIGART_PAGE_SIZE; 213 } 214 } 215 216 delete_area(map_area); 217 218 if( i == map_count ) { 219 // this case should never happen 220 SHOW_ERROR0( 0, "memory map of GART buffer too large!" ); 221 delete_area( gart->GATT.area ); 222 gart->GATT.area = -1; 223 return B_ERROR; 224 } 225 226 // this might be a bit more than needed, as 227 // 1. Intel CPUs have "processor order", i.e. writes appear to external 228 // devices in program order, so a simple final write should be sufficient 229 // 2. if it is a PCI GART, bus snooping should provide cache coherence 230 // 3. this function is a no-op :( 231 clear_caches( gart->GATT.ptr, num_pages * sizeof( uint32 ), 232 B_FLUSH_DCACHE ); 233 234 // back to real live - some chipsets have write buffers that 235 // proove all previous assumptions wrong 236 // (don't know whether this really helps though) 237 #if defined(__INTEL__) 238 asm volatile ( "wbinvd" ::: "memory" ); 239 #elif defined(__POWERPC__) 240 // TODO : icbi on PowerPC to flush instruction cache? 241 #endif 242 return B_OK; 243 } 244 245 // destroy GART buffer 246 static void destroyGARTBuffer( GART_info *gart ) 247 { 248 if( gart->buffer.area > 0 ) 249 delete_area( gart->buffer.area ); 250 251 if( gart->buffer.unaligned_area > 0 ) 252 delete_area( gart->buffer.unaligned_area ); 253 254 gart->buffer.area = gart->buffer.unaligned_area = -1; 255 } 256 257 258 // destroy GATT 259 static void destroyGATT( GART_info *gart ) 260 { 261 if( gart->GATT.area > 0 ) 262 delete_area( gart->GATT.area ); 263 264 gart->GATT.area = -1; 265 } 266 267 268 // init PCI GART 269 status_t Radeon_InitPCIGART( device_info *di ) 270 { 271 status_t result; 272 273 result = createGARTBuffer( &di->pci_gart, PCI_GART_SIZE ); 274 if( result < 0 ) 275 goto err1; 276 277 result = initGATT( &di->pci_gart ); 278 if( result < 0 ) 279 goto err2; 280 281 return B_OK; 282 283 err2: 284 destroyGARTBuffer( &di->pci_gart ); 285 286 err1: 287 return result; 288 } 289 290 291 // cleanup PCI GART 292 void Radeon_CleanupPCIGART( device_info *di ) 293 { 294 vuint8 *regs = di->regs; 295 296 SHOW_FLOW0( 3, "" ); 297 298 // perhaps we should wait for FIFO space before messing around with registers, but 299 // 1. I don't want to add all the sync stuff to the kernel driver 300 // 2. I doubt that these regs are buffered by FIFO 301 // but still: in worst case CP has written some commands to register FIFO, 302 // which can do any kind of nasty things 303 304 // disable CP BM 305 OUTREG( regs, RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIDIS_INDDIS ); 306 // read-back for flushing 307 INREG( regs, RADEON_CP_CSQ_CNTL ); 308 309 // disable bus mastering 310 OUTREGP( regs, RADEON_BUS_CNTL, RADEON_BUS_MASTER_DIS, ~RADEON_BUS_MASTER_DIS ); 311 // disable PCI GART 312 OUTREGP( regs, RADEON_AIC_CNTL, 0, ~RADEON_PCIGART_TRANSLATE_EN ); 313 314 destroyGATT( &di->pci_gart ); 315 destroyGARTBuffer( &di->pci_gart ); 316 } 317