1 /*
2 Copyright (c) 2002, Thomas Kurschel
3
4 Part of Radeon kernel driver
5
6 PCI GART.
7
8 Currently, we use PCI DMA. Changing to AGP would
9 only affect this file, but AGP-GART is specific to
10 the chipset of the motherboard, and as DMA is really
11 overkill for 2D, I cannot bother writing a dozen
12 of AGP drivers just to gain little extra speedup.
13 */
14
15
16 #include "radeon_driver.h"
17 #include "mmio.h"
18 #include "buscntrl_regs.h"
19 #include "memcntrl_regs.h"
20 #include "cp_regs.h"
21
22 #include <image.h>
23
24 #include <stdlib.h>
25 #include <string.h>
26
27
28 #if 1
29 //! create actual GART buffer
30 static status_t
createGARTBuffer(GART_info * gart,size_t size)31 createGARTBuffer(GART_info *gart, size_t size)
32 {
33 SHOW_FLOW0( 3, "" );
34
35 gart->buffer.size = size = (size + B_PAGE_SIZE - 1) & ~(B_PAGE_SIZE - 1);
36
37 // if this buffer is used for PCI BM, cache snooping
38 // takes care of syncing memory accesses; if used for AGP,
39 // we'll have to access via AGP aperture (and mark aperture
40 // as write-combined) as cache consistency doesn't need to
41 // be guaranteed
42
43 // the specs say that some chipsets do kind of lazy flushing
44 // so the graphics card may read obsolete data; up to now
45 // we use PCI only where this shouldn't happen by design;
46 // if we change to AGP we may tweak the pre-charge time of
47 // the write buffer pointer
48
49 // as some variables in accelerant point directly into
50 // the DMA buffer, we have to grant access for all apps
51 gart->buffer.area = create_area("Radeon PCI GART buffer",
52 &gart->buffer.ptr, B_ANY_KERNEL_ADDRESS,
53 size, B_FULL_LOCK,
54 // TODO: really user read/write?
55 B_READ_AREA | B_WRITE_AREA | B_CLONEABLE_AREA);
56 if (gart->buffer.area < 0) {
57 SHOW_ERROR(1, "cannot create PCI GART buffer (%s)",
58 strerror(gart->buffer.area));
59 return gart->buffer.area;
60 }
61
62 gart->buffer.unaligned_area = -1;
63
64 memset( gart->buffer.ptr, 0, size );
65
66 return B_OK;
67 }
68
69 #else
70
createGARTBuffer(GART_info * gart,size_t size)71 static status_t createGARTBuffer( GART_info *gart, size_t size )
72 {
73 physical_entry map[1];
74 void *unaligned_addr, *aligned_phys;
75
76 SHOW_FLOW0( 3, "" );
77
78 gart->buffer.size = size = (size + B_PAGE_SIZE - 1) & ~(B_PAGE_SIZE - 1);
79
80 // we allocate an contiguous area having twice the size
81 // to be able to find an aligned, contiguous range within it;
82 // the graphics card doesn't care, but the CPU cannot
83 // make an arbitrary area WC'ed, at least elder ones
84 // question: is this necessary for a PCI GART because of bus snooping?
85 gart->buffer.unaligned_area = create_area( "Radeon PCI GART buffer",
86 &unaligned_addr, B_ANY_KERNEL_ADDRESS,
87 2 * size, B_CONTIGUOUS/*B_FULL_LOCK*/, B_READ_AREA | B_WRITE_AREA | B_CLONEABLE_AREA );
88 // TODO: Physical aligning can be done without waste using the
89 // private create_area_etc().
90 if (gart->buffer.unaligned_area < 0) {
91 SHOW_ERROR( 1, "cannot create PCI GART buffer (%s)",
92 strerror( gart->buffer.unaligned_area ));
93 return gart->buffer.unaligned_area;
94 }
95
96 get_memory_map( unaligned_addr, B_PAGE_SIZE, map, 1 );
97
98 aligned_phys =
99 (void **)((map[0].address + size - 1) & ~(size - 1));
100
101 SHOW_FLOW( 3, "aligned_phys=%p", aligned_phys );
102
103 gart->buffer.area = map_physical_memory( "Radeon aligned PCI GART buffer",
104 (addr_t)aligned_phys,
105 size, B_ANY_KERNEL_BLOCK_ADDRESS | B_WRITE_COMBINING_MEMORY,
106 B_READ_AREA | B_WRITE_AREA, &gart->buffer.ptr );
107
108 if( gart->buffer.area < 0 ) {
109 SHOW_ERROR0( 3, "cannot map buffer with WC" );
110 gart->buffer.area = map_physical_memory( "Radeon aligned PCI GART buffer",
111 (addr_t)aligned_phys,
112 size, B_ANY_KERNEL_BLOCK_ADDRESS,
113 B_READ_AREA | B_WRITE_AREA, &gart->buffer.ptr );
114 }
115
116 if( gart->buffer.area < 0 ) {
117 SHOW_ERROR0( 1, "cannot map GART buffer" );
118 delete_area( gart->buffer.unaligned_area );
119 gart->buffer.unaligned_area = -1;
120 return gart->buffer.area;
121 }
122
123 memset( gart->buffer.ptr, 0, size );
124
125 return B_OK;
126 }
127
128 #endif
129
130 // init GATT (could be used for both PCI and AGP)
initGATT(GART_info * gart)131 static status_t initGATT( GART_info *gart )
132 {
133 area_id map_area;
134 uint32 map_area_size;
135 physical_entry *map;
136 physical_entry PTB_map[1];
137 size_t map_count;
138 uint32 i;
139 uint32 *gatt_entry;
140 size_t num_pages;
141
142 SHOW_FLOW0( 3, "" );
143
144 num_pages = (gart->buffer.size + B_PAGE_SIZE - 1) & ~(B_PAGE_SIZE - 1);
145
146 // GART must be contiguous
147 gart->GATT.area = create_area("Radeon GATT", (void **)&gart->GATT.ptr,
148 B_ANY_KERNEL_ADDRESS,
149 (num_pages * sizeof( uint32 ) + B_PAGE_SIZE - 1) & ~(B_PAGE_SIZE - 1),
150 B_32_BIT_CONTIGUOUS,
151 // TODO: Physical address is cast to 32 bit below! Use B_CONTIGUOUS,
152 // when that is (/can be) fixed!
153 // TODO: really user read/write?
154 B_READ_AREA | B_WRITE_AREA | B_CLONEABLE_AREA);
155
156 if (gart->GATT.area < 0) {
157 SHOW_ERROR(1, "cannot create GATT table (%s)",
158 strerror(gart->GATT.area));
159 return gart->GATT.area;
160 }
161
162 get_memory_map(gart->GATT.ptr, B_PAGE_SIZE, PTB_map, 1);
163 gart->GATT.phys = PTB_map[0].address;
164
165 SHOW_INFO(3, "GATT_ptr=%p, GATT_phys=%p", gart->GATT.ptr,
166 (void *)gart->GATT.phys);
167
168 // get address mapping
169 memset(gart->GATT.ptr, 0, num_pages * sizeof(uint32));
170
171 map_count = num_pages + 1;
172
173 // align size to B_PAGE_SIZE
174 map_area_size = map_count * sizeof(physical_entry);
175 if ((map_area_size / B_PAGE_SIZE) * B_PAGE_SIZE != map_area_size)
176 map_area_size = ((map_area_size / B_PAGE_SIZE) + 1) * B_PAGE_SIZE;
177
178 // temporary area where we fill in the memory map (deleted below)
179 map_area = create_area("pci_gart_map_area", (void **)&map, B_ANY_ADDRESS,
180 map_area_size, B_FULL_LOCK, B_READ_AREA | B_WRITE_AREA);
181 // TODO: We actually have a working malloc() in the kernel. Why create
182 // an area?
183 dprintf("pci_gart_map_area: %" B_PRId32 "\n", map_area);
184
185 get_memory_map( gart->buffer.ptr, gart->buffer.size, map, map_count );
186
187 // the following looks a bit strange as the kernel
188 // combines successive entries
189 gatt_entry = gart->GATT.ptr;
190
191 for( i = 0; i < map_count; ++i ) {
192 phys_addr_t addr = map[i].address;
193 size_t size = map[i].size;
194
195 if( size == 0 )
196 break;
197
198 while( size > 0 ) {
199 *gatt_entry++ = addr;
200 //SHOW_FLOW( 3, "%lx", *(gart_entry-1) );
201 addr += ATI_PCIGART_PAGE_SIZE;
202 size -= ATI_PCIGART_PAGE_SIZE;
203 }
204 }
205
206 delete_area(map_area);
207
208 if( i == map_count ) {
209 // this case should never happen
210 SHOW_ERROR0( 0, "memory map of GART buffer too large!" );
211 delete_area( gart->GATT.area );
212 gart->GATT.area = -1;
213 return B_ERROR;
214 }
215
216 // this might be a bit more than needed, as
217 // 1. Intel CPUs have "processor order", i.e. writes appear to external
218 // devices in program order, so a simple final write should be sufficient
219 // 2. if it is a PCI GART, bus snooping should provide cache coherence
220 // 3. this function is a no-op :(
221 clear_caches( gart->GATT.ptr, num_pages * sizeof( uint32 ),
222 B_FLUSH_DCACHE );
223
224 // back to real live - some chipsets have write buffers that
225 // proove all previous assumptions wrong
226 // (don't know whether this really helps though)
227 #if defined(__i386__)
228 asm volatile ( "wbinvd" ::: "memory" );
229 #elif defined(__POWERPC__)
230 // TODO : icbi on PowerPC to flush instruction cache?
231 #endif
232 return B_OK;
233 }
234
235 // destroy GART buffer
destroyGARTBuffer(GART_info * gart)236 static void destroyGARTBuffer( GART_info *gart )
237 {
238 if( gart->buffer.area > 0 )
239 delete_area( gart->buffer.area );
240
241 if( gart->buffer.unaligned_area > 0 )
242 delete_area( gart->buffer.unaligned_area );
243
244 gart->buffer.area = gart->buffer.unaligned_area = -1;
245 }
246
247
248 // destroy GATT
destroyGATT(GART_info * gart)249 static void destroyGATT( GART_info *gart )
250 {
251 if( gart->GATT.area > 0 )
252 delete_area( gart->GATT.area );
253
254 gart->GATT.area = -1;
255 }
256
257
258 // init PCI GART
Radeon_InitPCIGART(device_info * di)259 status_t Radeon_InitPCIGART( device_info *di )
260 {
261 status_t result;
262
263 result = createGARTBuffer( &di->pci_gart, PCI_GART_SIZE );
264 if( result < 0 )
265 goto err1;
266
267 result = initGATT( &di->pci_gart );
268 if( result < 0 )
269 goto err2;
270
271 return B_OK;
272
273 err2:
274 destroyGARTBuffer( &di->pci_gart );
275
276 err1:
277 return result;
278 }
279
280
281 // cleanup PCI GART
Radeon_CleanupPCIGART(device_info * di)282 void Radeon_CleanupPCIGART( device_info *di )
283 {
284 vuint8 *regs = di->regs;
285
286 SHOW_FLOW0( 3, "" );
287
288 // perhaps we should wait for FIFO space before messing around with registers, but
289 // 1. I don't want to add all the sync stuff to the kernel driver
290 // 2. I doubt that these regs are buffered by FIFO
291 // but still: in worst case CP has written some commands to register FIFO,
292 // which can do any kind of nasty things
293
294 // disable CP BM
295 OUTREG( regs, RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIDIS_INDDIS );
296 // read-back for flushing
297 INREG( regs, RADEON_CP_CSQ_CNTL );
298
299 // disable bus mastering
300 OUTREGP( regs, RADEON_BUS_CNTL, RADEON_BUS_MASTER_DIS, ~RADEON_BUS_MASTER_DIS );
301 // disable PCI GART
302 OUTREGP( regs, RADEON_AIC_CNTL, 0, ~RADEON_PCIGART_TRANSLATE_EN );
303
304 destroyGATT( &di->pci_gart );
305 destroyGARTBuffer( &di->pci_gart );
306 }
307