xref: /haiku/src/add-ons/kernel/drivers/graphics/radeon/PCI_GART.c (revision e81a954787e50e56a7f06f72705b7859b6ab06d1)
1 /*
2 	Copyright (c) 2002, Thomas Kurschel
3 
4 	Part of Radeon kernel driver
5 
6 	PCI GART.
7 
8 	Currently, we use PCI DMA. Changing to AGP would
9 	only affect this file, but AGP-GART is specific to
10 	the chipset of the motherboard, and as DMA is really
11 	overkill for 2D, I cannot bother writing a dozen
12 	of AGP drivers just to gain little extra speedup.
13 */
14 
15 
16 #include "radeon_driver.h"
17 #include "mmio.h"
18 #include "buscntrl_regs.h"
19 #include "memcntrl_regs.h"
20 #include "cp_regs.h"
21 
22 #include <image.h>
23 
24 #include <stdlib.h>
25 #include <string.h>
26 
27 
28 #if 1
29 //! create actual GART buffer
30 static status_t
31 createGARTBuffer(GART_info *gart, size_t size)
32 {
33 	SHOW_FLOW0( 3, "" );
34 
35 	gart->buffer.size = size = (size + B_PAGE_SIZE - 1) & ~(B_PAGE_SIZE - 1);
36 
37 	// if this buffer is used for PCI BM, cache snooping
38 	// takes care of syncing memory accesses; if used for AGP,
39 	// we'll have to access via AGP aperture (and mark aperture
40 	// as write-combined) as cache consistency doesn't need to
41 	// be guaranteed
42 
43 	// the specs say that some chipsets do kind of lazy flushing
44 	// so the graphics card may read obsolete data; up to now
45 	// we use PCI only where this shouldn't happen by design;
46 	// if we change to AGP we may tweak the pre-charge time of
47 	// the write buffer pointer
48 
49 	// as some variables in accelerant point directly into
50 	// the DMA buffer, we have to grant access for all apps
51 	gart->buffer.area = create_area("Radeon PCI GART buffer",
52 		&gart->buffer.ptr, B_ANY_KERNEL_ADDRESS,
53 		size, B_FULL_LOCK,
54 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
55 		// TODO: really user read/write?
56 		B_READ_AREA | B_WRITE_AREA | B_USER_CLONEABLE_AREA
57 #else
58 		0
59 #endif
60 		);
61 	if (gart->buffer.area < 0) {
62 		SHOW_ERROR(1, "cannot create PCI GART buffer (%s)",
63 			strerror(gart->buffer.area));
64 		return gart->buffer.area;
65 	}
66 
67 	gart->buffer.unaligned_area = -1;
68 
69 	memset( gart->buffer.ptr, 0, size );
70 
71 	return B_OK;
72 }
73 
74 #else
75 
76 static status_t createGARTBuffer( GART_info *gart, size_t size )
77 {
78 	physical_entry map[1];
79 	void *unaligned_addr, *aligned_phys;
80 
81 	SHOW_FLOW0( 3, "" );
82 
83 	gart->buffer.size = size = (size + B_PAGE_SIZE - 1) & ~(B_PAGE_SIZE - 1);
84 
85 	// we allocate an contiguous area having twice the size
86 	// to be able to find an aligned, contiguous range within it;
87 	// the graphics card doesn't care, but the CPU cannot
88 	// make an arbitrary area WC'ed, at least elder ones
89 	// question: is this necessary for a PCI GART because of bus snooping?
90 	gart->buffer.unaligned_area = create_area( "Radeon PCI GART buffer",
91 		&unaligned_addr, B_ANY_KERNEL_ADDRESS,
92 		2 * size, B_CONTIGUOUS/*B_FULL_LOCK*/, B_READ_AREA | B_WRITE_AREA | B_USER_CLONEABLE_AREA );
93 		// TODO: Physical aligning can be done without waste using the
94 		// private create_area_etc().
95 	if (gart->buffer.unaligned_area < 0) {
96 		SHOW_ERROR( 1, "cannot create PCI GART buffer (%s)",
97 			strerror( gart->buffer.unaligned_area ));
98 		return gart->buffer.unaligned_area;
99 	}
100 
101 	get_memory_map( unaligned_addr, B_PAGE_SIZE, map, 1 );
102 
103 	aligned_phys =
104 		(void **)((map[0].address + size - 1) & ~(size - 1));
105 
106 	SHOW_FLOW( 3, "aligned_phys=%p", aligned_phys );
107 
108 	gart->buffer.area = map_physical_memory( "Radeon aligned PCI GART buffer",
109 		(addr_t)aligned_phys,
110 		size, B_ANY_KERNEL_BLOCK_ADDRESS | B_MTR_WC,
111 		B_READ_AREA | B_WRITE_AREA, &gart->buffer.ptr );
112 
113 	if( gart->buffer.area < 0 ) {
114 		SHOW_ERROR0( 3, "cannot map buffer with WC" );
115 		gart->buffer.area = map_physical_memory( "Radeon aligned PCI GART buffer",
116 			(addr_t)aligned_phys,
117 			size, B_ANY_KERNEL_BLOCK_ADDRESS,
118 			B_READ_AREA | B_WRITE_AREA, &gart->buffer.ptr );
119 	}
120 
121 	if( gart->buffer.area < 0 ) {
122 		SHOW_ERROR0( 1, "cannot map GART buffer" );
123 		delete_area( gart->buffer.unaligned_area );
124 		gart->buffer.unaligned_area = -1;
125 		return gart->buffer.area;
126 	}
127 
128 	memset( gart->buffer.ptr, 0, size );
129 
130 	return B_OK;
131 }
132 
133 #endif
134 
135 // init GATT (could be used for both PCI and AGP)
136 static status_t initGATT( GART_info *gart )
137 {
138 	area_id map_area;
139 	uint32 map_area_size;
140 	physical_entry *map;
141 	physical_entry PTB_map[1];
142 	size_t map_count;
143 	uint32 i;
144 	uint32 *gatt_entry;
145 	size_t num_pages;
146 
147 	SHOW_FLOW0( 3, "" );
148 
149 	num_pages = (gart->buffer.size + B_PAGE_SIZE - 1) & ~(B_PAGE_SIZE - 1);
150 
151 	// GART must be contiguous
152 	gart->GATT.area = create_area("Radeon GATT", (void **)&gart->GATT.ptr,
153 		B_ANY_KERNEL_ADDRESS,
154 		(num_pages * sizeof( uint32 ) + B_PAGE_SIZE - 1) & ~(B_PAGE_SIZE - 1),
155 		B_32_BIT_CONTIGUOUS,
156 			// TODO: Physical address is cast to 32 bit below! Use B_CONTIGUOUS,
157 			// when that is (/can be) fixed!
158 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
159 		// TODO: really user read/write?
160 		B_READ_AREA | B_WRITE_AREA | B_USER_CLONEABLE_AREA
161 #else
162 		0
163 #endif
164 		);
165 
166 	if (gart->GATT.area < 0) {
167 		SHOW_ERROR(1, "cannot create GATT table (%s)",
168 			strerror(gart->GATT.area));
169 		return gart->GATT.area;
170 	}
171 
172 	get_memory_map(gart->GATT.ptr, B_PAGE_SIZE, PTB_map, 1);
173 	gart->GATT.phys = PTB_map[0].address;
174 
175 	SHOW_INFO(3, "GATT_ptr=%p, GATT_phys=%p", gart->GATT.ptr,
176 		(void *)gart->GATT.phys);
177 
178 	// get address mapping
179 	memset(gart->GATT.ptr, 0, num_pages * sizeof(uint32));
180 
181 	map_count = num_pages + 1;
182 
183 	// align size to B_PAGE_SIZE
184 	map_area_size = map_count * sizeof(physical_entry);
185 	if ((map_area_size / B_PAGE_SIZE) * B_PAGE_SIZE != map_area_size)
186 		map_area_size = ((map_area_size / B_PAGE_SIZE) + 1) * B_PAGE_SIZE;
187 
188 	// temporary area where we fill in the memory map (deleted below)
189 	map_area = create_area("pci_gart_map_area", (void **)&map, B_ANY_ADDRESS,
190 		map_area_size, B_FULL_LOCK, B_READ_AREA | B_WRITE_AREA);
191 		// TODO: We actually have a working malloc() in the kernel. Why create
192 		// an area?
193 	dprintf("pci_gart_map_area: %ld\n", map_area);
194 
195 	get_memory_map( gart->buffer.ptr, gart->buffer.size, map, map_count );
196 
197 	// the following looks a bit strange as the kernel
198 	// combines successive entries
199 	gatt_entry = gart->GATT.ptr;
200 
201 	for( i = 0; i < map_count; ++i ) {
202 		phys_addr_t addr = map[i].address;
203 		size_t size = map[i].size;
204 
205 		if( size == 0 )
206 			break;
207 
208 		while( size > 0 ) {
209 			*gatt_entry++ = addr;
210 			//SHOW_FLOW( 3, "%lx", *(gart_entry-1) );
211 			addr += ATI_PCIGART_PAGE_SIZE;
212 			size -= ATI_PCIGART_PAGE_SIZE;
213 		}
214 	}
215 
216 	delete_area(map_area);
217 
218 	if( i == map_count ) {
219 		// this case should never happen
220 		SHOW_ERROR0( 0, "memory map of GART buffer too large!" );
221 		delete_area( gart->GATT.area );
222 		gart->GATT.area = -1;
223 		return B_ERROR;
224 	}
225 
226 	// this might be a bit more than needed, as
227 	// 1. Intel CPUs have "processor order", i.e. writes appear to external
228 	//    devices in program order, so a simple final write should be sufficient
229 	// 2. if it is a PCI GART, bus snooping should provide cache coherence
230 	// 3. this function is a no-op :(
231 	clear_caches( gart->GATT.ptr, num_pages * sizeof( uint32 ),
232 		B_FLUSH_DCACHE );
233 
234 	// back to real live - some chipsets have write buffers that
235 	// proove all previous assumptions wrong
236 	// (don't know whether this really helps though)
237 	#if defined(__INTEL__)
238 	asm volatile ( "wbinvd" ::: "memory" );
239 	#elif defined(__POWERPC__)
240 	// TODO : icbi on PowerPC to flush instruction cache?
241 	#endif
242 	return B_OK;
243 }
244 
245 // destroy GART buffer
246 static void destroyGARTBuffer( GART_info *gart )
247 {
248 	if( gart->buffer.area > 0 )
249 		delete_area( gart->buffer.area );
250 
251 	if( gart->buffer.unaligned_area > 0 )
252 		delete_area( gart->buffer.unaligned_area );
253 
254 	gart->buffer.area = gart->buffer.unaligned_area = -1;
255 }
256 
257 
258 // destroy GATT
259 static void destroyGATT( GART_info *gart )
260 {
261 	if( gart->GATT.area > 0 )
262 		delete_area( gart->GATT.area );
263 
264 	gart->GATT.area = -1;
265 }
266 
267 
268 // init PCI GART
269 status_t Radeon_InitPCIGART( device_info *di )
270 {
271 	status_t result;
272 
273 	result = createGARTBuffer( &di->pci_gart, PCI_GART_SIZE );
274 	if( result < 0 )
275 		goto err1;
276 
277 	result = initGATT( &di->pci_gart );
278 	if( result < 0 )
279 		goto err2;
280 
281 	return B_OK;
282 
283 err2:
284 	destroyGARTBuffer( &di->pci_gart );
285 
286 err1:
287 	return result;
288 }
289 
290 
291 // cleanup PCI GART
292 void Radeon_CleanupPCIGART( device_info *di )
293 {
294 	vuint8 *regs = di->regs;
295 
296 	SHOW_FLOW0( 3, "" );
297 
298 	// perhaps we should wait for FIFO space before messing around with registers, but
299 	// 1. I don't want to add all the sync stuff to the kernel driver
300 	// 2. I doubt that these regs are buffered by FIFO
301 	// but still: in worst case CP has written some commands to register FIFO,
302 	// which can do any kind of nasty things
303 
304 	// disable CP BM
305 	OUTREG( regs, RADEON_CP_CSQ_CNTL, RADEON_CSQ_PRIDIS_INDDIS );
306 	// read-back for flushing
307 	INREG( regs, RADEON_CP_CSQ_CNTL );
308 
309 	// disable bus mastering
310 	OUTREGP( regs, RADEON_BUS_CNTL, RADEON_BUS_MASTER_DIS, ~RADEON_BUS_MASTER_DIS );
311 	// disable PCI GART
312 	OUTREGP( regs, RADEON_AIC_CNTL, 0, ~RADEON_PCIGART_TRANSLATE_EN );
313 
314 	destroyGATT( &di->pci_gart );
315 	destroyGARTBuffer( &di->pci_gart );
316 }
317