xref: /haiku/src/add-ons/accelerants/radeon/CP.h (revision 2a37e4c1cf59b445e309a3880ceed82d25a0d107)
1 /*
2 	Copyright (c) 2002/03, Thomas Kurschel
3 
4 
5 	Part of Radeon accelerant
6 
7 	Command Processor interface.
8 
9 
10 	Buffer management:
11 
12 	We use both the circular buffer and indirect buffers. To let the CP
13 	execute something, you must allocate an indirect buffer by
14 	Radeon_AllocIndirectBuffer(), fill it, and let it post via
15 	Radeon_SendIndirectBuffer(). If you need some certain state before
16 	your buffer is executed, you can define a state buffer: in this
17 	buffer you write commands necessary to gain your whished state.
18 	You get this state buffer during startup via Radeon_AllocIndirectBuffer()
19 	and release it by Radeon_FreeIndirectBuffer() during shutdown.
20 	Whenever you want to change (or free) it, call
21 	Radeon_InvalidateStateBuffer() to make sure the state buffer is not
22 	in use. Radeon_SendIndirectBuffer() keeps track of the current
23 	state and if it's different then the state necessary for execution
24 	of an indirect buffer, it submit the state buffer first. State
25 	buffers are currently used for virtual cards only, but could be
26 	used for things like 3D accelerator state as well.
27 
28 	All indirect buffers have the same size: 4K (Radeons want them to
29 	be 4k aligned, so this is the minimum size). For 3D this may be too
30 	small, but for 2D it's more then enough. To not waste main memory
31 	(they cannot reside in graphics mem, at least my tests showed that
32 	you get consistency problems), there are currently 253 buffers.
33 	As the ring buffer only contains calls to indirect buffers and
34 	each call needs at most 8 dwords, 2025 dwords would be sufficient.
35 	Currently, there are 4K dwords circular buffer, which is more
36 	then enough. Perhaps, engine synchronization code will be moved
37 	from indirect to ring buffer to speed things up, in which case
38 	the ring buffer might be too small.
39 
40 	Indirect buffers are recycled if there is none left. To track their
41 	execution, each submitted buffer gets a tag (tags are numbered 0, 1...).
42 	and put into a list. After execution, the tag is written to scratch
43 	register 1 via CP. The recycler (Radeon_FreeIndirectBuffers())
44 	compares the tags of submitted buffers with scratch register 1 to
45 	detect finished buffers.
46 
47 	When you call any public function, you don't need to own any lock.
48 */
49 
50 #include "cp_regs.h"
51 
52 //status_t Radeon_InitCP( accelerator_info *ai );
53 
54 int Radeon_AllocIndirectBuffer( accelerator_info *ai, bool keep_lock );
55 void Radeon_FreeIndirectBuffer( accelerator_info *ai,
56 	int buffer_idx, bool never_used );
57 void Radeon_SendIndirectBuffer( accelerator_info *ai,
58 	int buffer_idx, int buffer_size,
59 	int state_buffer_idx, int state_buffer_size, bool has_lock );
60 void Radeon_InvalidateStateBuffer( accelerator_info *ai, int state_buffer_idx );
61 void Radeon_FreeIndirectBuffers( accelerator_info *ai );
62 void Radeon_DiscardAllIndirectBuffers( accelerator_info *ai );
63 
64 // get CPU address of indirect buffer
Radeon_GetIndirectBufferPtr(accelerator_info * ai,int buffer_idx)65 static inline uint32 *Radeon_GetIndirectBufferPtr( accelerator_info *ai, int buffer_idx )
66 {
67 	return (uint32 *)(ai->mapped_memory[ai->si->cp.buffers.mem_type].data + ai->si->cp.buffers.mem_offset)
68 		+ buffer_idx * INDIRECT_BUFFER_SIZE;
69 }
70 
71 // start writing into indirect buffer
72 #define START_IB() \
73 	{ \
74 		int buffer_idx; \
75 		uint32 *buffer_start, *buffer; \
76 \
77 		buffer_idx = Radeon_AllocIndirectBuffer( ai, true ); \
78 		buffer = buffer_start = Radeon_GetIndirectBufferPtr( ai, buffer_idx );
79 
80 // write "write register" into indirect buffer
81 #define WRITE_IB_REG( reg, value ) \
82 	do { buffer[0] = CP_PACKET0( (reg), 1 ); \
83 		 buffer[1] = (value); \
84 		 buffer += 2; } while( 0 )
85 
86 
87 // submit indirect buffer specific to virtual card
88 // stores tag of last command in engine.count
89 #define SUBMIT_IB_VC() \
90 		Radeon_SendIndirectBuffer( ai, \
91 			buffer_idx, buffer - buffer_start, \
92 			vc->state_buffer_idx, vc->state_buffer_size, true ); \
93 	}
94 
95 // submit indirect buffer, not specific to virtual card
96 #define SUBMIT_IB() \
97 		Radeon_SendIndirectBuffer( ai, \
98 			buffer_idx, buffer - buffer_start, \
99 			0, 0, true ); \
100 	}
101 
102 // write PACKET3 header, restricting block count
103 // 	command - command code
104 // 	count - whished number of blocks
105 // 	bytes_left - number of bytes left in buffer
106 // 	dwords_per_block - dwords per block
107 // 	dwords_in_header - dwords in header (i.e. dwords before the repeating blocks)
108 //
109 // the effective count is stored in "sub_count" substracted from "count";
110 // further, the first dwords of the packet is written
111 //
112 // remark: it's taken care of to keep in size of the buffer and the maximum number
113 // of bytes per command; the dword count as written into the first dword of the header
114 // is "size of body(!) in dwords - 1", which means "size of packet - 2"
115 #define WRITE_IB_PACKET3_HEAD( command, count, bytes_left, dwords_per_block, dwords_in_header ) \
116 	sub_count = min( count, \
117 		(min( bytes_left, (1 << 14) - 1 + 2) - dwords_in_header) / dwords_per_block ); \
118 	count -= sub_count; \
119 	*buffer++ = command	| (((sub_count * dwords_per_block) + dwords_in_header - 2) << 16);
120