xref: /haiku/src/add-ons/kernel/drivers/graphics/radeon/PCI_GART.c (revision 93aeb8c3bc3f13cb1f282e3e749258a23790d947)
1 /*
2 	Copyright (c) 2002, Thomas Kurschel
3 
4 
5 	Part of Radeon kernel driver
6 
7 	PCI GART.
8 
9 	Currently, we use PCI DMA. Changing to AGP would
10 	only affect this file, but AGP-GART is specific to
11 	the chipset of the motherboard, and as DMA is really
12 	overkill for 2D, I cannot bother writing a dozen
13 	of AGP drivers just to gain little extra speedup.
14 */
15 
16 
17 #include "radeon_driver.h"
18 #include <malloc.h>
19 #include <image.h>
20 #include "mmio.h"
21 #include "buscntrl_regs.h"
22 #include "memcntrl_regs.h"
23 #include "cp_regs.h"
24 #include <string.h>
25 
26 
27 #if 1
28 // create actual GART buffer
29 static status_t createGARTBuffer( GART_info *gart, size_t size )
30 {
31 	SHOW_FLOW0( 3, "" );
32 
33 	gart->buffer.size = size = (size + B_PAGE_SIZE - 1) & ~(B_PAGE_SIZE - 1);
34 
35 	// if this buffer is used for PCI BM, cache snooping
36 	// takes care of syncing memory accesses; if used for AGP,
37 	// we'll have to access via AGP aperture (and mark aperture
38 	// as write-combined) as cache consistency doesn't need to
39 	// be guaranteed
40 
41 	// the specs say that some chipsets do kind of lazy flushing
42 	// so the graphics card may read obsolete data; up to now
43 	// we use PCI only where this shouldn't happen by design;
44 	// if we change to AGP we may tweak the pre-charge time of
45 	// the write buffer pointer
46 
47 	// as some variables in accelerant point directly into
48 	// the DMA buffer, we have to grant access for all apps
49 	gart->buffer.area = create_area( "Radeon PCI GART buffer",
50 		&gart->buffer.ptr, B_ANY_KERNEL_ADDRESS,
51 		size, B_FULL_LOCK, B_READ_AREA | B_WRITE_AREA | B_USER_CLONEABLE_AREA );
52 	if( gart->buffer.area < 0 ) {
53 		SHOW_ERROR( 1, "cannot create PCI GART buffer (%s)",
54 			strerror( gart->buffer.area ));
55 		return gart->buffer.area;
56 	}
57 
58 	gart->buffer.unaligned_area = -1;
59 
60 	memset( gart->buffer.ptr, 0, size );
61 
62 	return B_OK;
63 }
64 
65 #else
66 
67 static status_t createGARTBuffer( GART_info *gart, size_t size )
68 {
69 	physical_entry map[1];
70 	void *unaligned_addr, *aligned_phys;
71 
72 	SHOW_FLOW0( 3, "" );
73 
74 	gart->buffer.size = size = (size + B_PAGE_SIZE - 1) & ~(B_PAGE_SIZE - 1);
75 
76 	// we allocate an contiguous area having twice the size
77 	// to be able to find an aligned, contiguous range within it;
78 	// the graphics card doesn't care, but the CPU cannot
79 	// make an arbitrary area WC'ed, at least elder ones
80 	// question: is this necessary for a PCI GART because of bus snooping?
81 	gart->buffer.unaligned_area = create_area( "Radeon PCI GART buffer",
82 		&unaligned_addr, B_ANY_KERNEL_ADDRESS,
83 		2 * size, B_CONTIGUOUS/*B_FULL_LOCK*/, B_READ_AREA | B_WRITE_AREA | B_USER_CLONEABLE_AREA );
84 	if( gart->buffer.unaligned_area < 0 ) {
85 		SHOW_ERROR( 1, "cannot create PCI GART buffer (%s)",
86 			strerror( gart->buffer.unaligned_area ));
87 		return gart->buffer.unaligned_area;
88 	}
89 
90 	get_memory_map( unaligned_addr, B_PAGE_SIZE, map, 1 );
91 
92 	aligned_phys =
93 		(void **)(((uint32)map[0].address + size - 1) & ~(size - 1));
94 
95 	SHOW_FLOW( 3, "aligned_phys=%p", aligned_phys );
96 
97 	gart->buffer.area = map_physical_memory( "Radeon aligned PCI GART buffer",
98 		aligned_phys,
99 		size, B_ANY_KERNEL_BLOCK_ADDRESS | B_MTR_WC,
100 		B_READ_AREA | B_WRITE_AREA, &gart->buffer.ptr );
101 
102 	if( gart->buffer.area < 0 ) {
103 		SHOW_ERROR0( 3, "cannot map buffer with WC" );
104 		gart->buffer.area = map_physical_memory( "Radeon aligned PCI GART buffer",
105 			aligned_phys,
106 			size, B_ANY_KERNEL_BLOCK_ADDRESS,
107 			B_READ_AREA | B_WRITE_AREA, &gart->buffer.ptr );
108 	}
109 
110 	if( gart->buffer.area < 0 ) {
111 		SHOW_ERROR0( 1, "cannot map GART buffer" );
112 		delete_area( gart->buffer.unaligned_area );
113 		gart->buffer.unaligned_area = -1;
114 		return gart->buffer.area;
115 	}
116 
117 	memset( gart->buffer.ptr, 0, size );
118 
119 	return B_OK;
120 }
121 
122 #endif
123 
124 // init GATT (could be used for both PCI and AGP)
125 static status_t initGATT( GART_info *gart )
126 {
127 	area_id map_area;
128 	uint32 map_area_size;
129 	physical_entry *map;
130 	physical_entry PTB_map[1];
131 	size_t map_count;
132 	uint32 i;
133 	uint32 *gatt_entry;
134 	size_t num_pages;
135 
136 	SHOW_FLOW0( 3, "" );
137 
138 	num_pages = (gart->buffer.size + B_PAGE_SIZE - 1) & ~(B_PAGE_SIZE - 1);
139 
140 	// GART must be contignuous
141 	gart->GATT.area = create_area( "Radeon GATT", (void **)&gart->GATT.ptr,
142 		B_ANY_KERNEL_ADDRESS,
143 		(num_pages * sizeof( uint32 ) + B_PAGE_SIZE - 1) & ~(B_PAGE_SIZE - 1),
144 		B_CONTIGUOUS, B_READ_AREA | B_WRITE_AREA | B_USER_CLONEABLE_AREA );
145 
146 	if( gart->GATT.area < 0 ) {
147 		SHOW_ERROR( 1, "cannot create GATT table (%s)",
148 			strerror( gart->GATT.area ));
149 		return gart->GATT.area;
150 	}
151 
152 	get_memory_map( gart->GATT.ptr, B_PAGE_SIZE, PTB_map, 1 );
153 	gart->GATT.phys = (uint32)PTB_map[0].address;
154 
155 	SHOW_INFO( 3, "GATT_ptr=%p, GATT_phys=%p", gart->GATT.ptr,
156 		(void *)gart->GATT.phys );
157 
158 	// get address mapping
159 	memset( gart->GATT.ptr, 0, num_pages * sizeof( uint32 ));
160 
161 	map_count = num_pages + 1;
162 
163 	// align size to B_PAGE_SIZE
164 	map_area_size = map_count * sizeof(physical_entry);
165 	if ((map_area_size / B_PAGE_SIZE) * B_PAGE_SIZE != map_area_size)
166 		map_area_size = ((map_area_size / B_PAGE_SIZE) + 1) * B_PAGE_SIZE;
167 
168 	// temporary area where we fill in the memory map (deleted below)
169 	map_area = create_area("pci_gart_map_area", (void **)&map, B_ANY_ADDRESS, map_area_size, B_FULL_LOCK, B_READ_AREA | B_WRITE_AREA);
170 	dprintf("pci_gart_map_area: %ld\n", map_area);
171 
172 	get_memory_map( gart->buffer.ptr, gart->buffer.size, map, map_count );
173 
174 	// the following looks a bit strange as the kernel
175 	// combines successive entries
176 	gatt_entry = gart->GATT.ptr;
177 
178 	for( i = 0; i < map_count; ++i ) {
179 		uint32 addr = (uint32)map[i].address;
180 		size_t size = map[i].size;
181 
182 		if( size == 0 )
183 			break;
184 
185 		while( size > 0 ) {
186 			*gatt_entry++ = addr;
187 			//SHOW_FLOW( 3, "%lx", *(gart_entry-1) );
188 			addr += ATI_PCIGART_PAGE_SIZE;
189 			size -= ATI_PCIGART_PAGE_SIZE;
190 		}
191 	}
192 
193 	delete_area(map_area);
194 
195 	if( i == map_count ) {
196 		// this case should never happen
197 		SHOW_ERROR0( 0, "memory map of GART buffer too large!" );
198 		delete_area( gart->GATT.area );
199 		gart->GATT.area = -1;
200 		return B_ERROR;
201 	}
202 
203 	// this might be a bit more than needed, as
204 	// 1. Intel CPUs have "processor order", i.e. writes appear to external
205 	//    devices in program order, so a simple final write should be sufficient
206 	// 2. if it is a PCI GART, bus snooping should provide cache coherence
207 	// 3. this function is a no-op :(
208 	clear_caches( gart->GATT.ptr, num_pages * sizeof( uint32 ),
209 		B_FLUSH_DCACHE );
210 
211 	// back to real live - some chipsets have write buffers that
212 	// proove all previous assumptions wrong
213 	// (don't know whether this really helps though)
214 	asm volatile ( "wbinvd" ::: "memory" );
215 	return B_OK;
216 }
217 
218 // destroy GART buffer
219 static void destroyGARTBuffer( GART_info *gart )
220 {
221 	if( gart->buffer.area > 0 )
222 		delete_area( gart->buffer.area );
223 
224 	if( gart->buffer.unaligned_area > 0 )
225 		delete_area( gart->buffer.unaligned_area );
226 
227 	gart->buffer.area = gart->buffer.unaligned_area = -1;
228 }
229 
230 
231 // destroy GATT
232 static void destroyGATT( GART_info *gart )
233 {
234 	if( gart->GATT.area > 0 )
235 		delete_area( gart->GATT.area );
236 
237 	gart->GATT.area = -1;
238 }
239 
240 
241 // init PCI GART
242 status_t Radeon_InitPCIGART( device_info *di )
243 {
244 	status_t result;
245 
246 	result = createGARTBuffer( &di->pci_gart, PCI_GART_SIZE );
247 	if( result < 0 )
248 		goto err1;
249 
250 	result = initGATT( &di->pci_gart );
251 	if( result < 0 )
252 		goto err2;
253 
254 	return B_OK;
255 
256 err2:
257 	destroyGARTBuffer( &di->pci_gart );
258 
259 err1:
260 	return result;
261 }
262 
263 
264 // cleanup PCI GART
265 void Radeon_CleanupPCIGART( device_info *di )
266 {
267 	vuint8 *regs = di->regs;
268 
269 	SHOW_FLOW0( 3, "" );
270 
271 	// perhaps we should wait for FIFO space before messing around with registers, but
272 	// 1. I don't want to add all the sync stuff to the kernel driver
273 	// 2. I doubt that these regs are buffered by FIFO
274 	// but still: in worst case CP has written some commands to register FIFO,
275 	// which can do any kind of nasty things
276 
277 	// disable CP BM
278 	OUTREG( regs, RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIDIS_INDDIS );
279 	// read-back for flushing
280 	INREG( regs, RADEON_CP_CSQ_CNTL );
281 
282 	// disable bus mastering
283 	OUTREGP( regs, RADEON_BUS_CNTL, RADEON_BUS_MASTER_DIS, ~RADEON_BUS_MASTER_DIS );
284 	// disable PCI GART
285 	OUTREGP( regs, RADEON_AIC_CNTL, 0, ~RADEON_PCIGART_TRANSLATE_EN );
286 
287 	destroyGATT( &di->pci_gart );
288 	destroyGARTBuffer( &di->pci_gart );
289 }
290