xref: /haiku/src/add-ons/media/plugins/ffmpeg/gfx_conv_mmx.cpp (revision e81a954787e50e56a7f06f72705b7859b6ab06d1)
1 #include "gfx_conv_mmx.h"
2 #include "gfx_conv_c.h"
3 
4 // Packed
5 extern "C" void _Convert_YUV422_RGBA32_SSE(void *fromYPtr, void *toPtr,
6 	int width);
7 extern "C" void _Convert_YUV422_RGBA32_SSE2(void *fromYPtr, void *toPtr,
8 	int width);
9 extern "C" void _Convert_YUV422_RGBA32_SSSE3(void *fromYPtr, void *toPtr,
10 	int width);
11 
12 // Planar
13 extern "C" void _Convert_YUV420P_RGBA32_SSE(void *fromYPtr, void *fromUPtr,
14 	void *fromVPtr, void *toPtr, int width);
15 extern "C" void _Convert_YUV420P_RGBA32_SSE2(void *fromYPtr, void *fromUPtr,
16 	void *fromVPtr, void *toPtr, int width);
17 extern "C" void _Convert_YUV420P_RGBA32_SSSE3(void *fromYPtr, void *fromUPtr,
18 	void *fromVPtr, void *toPtr, int width);
19 
20 
21 // Planar YUV420 means 2 Y lines share a UV line
22 void
23 gfx_conv_yuv420p_rgba32_sse(AVFrame *in, AVFrame *out, int width, int height)
24 {
25 	// in and out buffers must be aligned to 16 bytes,
26 	// in should be as ffmpeg allocates it
27 	if ((off_t)out->data[0] % 16 != 0) {
28 		gfx_conv_YCbCr420p_RGB32_c(in, out, width, height);
29 		return;
30 	}
31 
32 	uint8 *ybase = (uint8 *)in->data[0];
33 	uint8 *ubase = (uint8 *)in->data[1];
34 	uint8 *vbase = (uint8 *)in->data[2];
35 	uint8 *rgbbase = (uint8 *)out->data[0];
36 
37 	int yBaseInc = in->linesize[0];
38 	int uBaseInc = in->linesize[1];
39 	int vBaseInc = in->linesize[2];
40 	int rgbBaseInc = out->linesize[0];
41 
42 	for (int i=0;i<height;i+=2) {
43 		// First Y row
44 		_Convert_YUV420P_RGBA32_SSE(ybase, ubase, vbase, rgbbase, width);
45 		ybase += yBaseInc;
46 		rgbbase += rgbBaseInc;
47 
48 		// Second Y row but same u and v row
49 		_Convert_YUV420P_RGBA32_SSE(ybase, ubase, vbase, rgbbase, width);
50 		ybase += yBaseInc;
51 		ubase += uBaseInc;
52 		vbase += vBaseInc;
53 		rgbbase += rgbBaseInc;
54 	}
55 }
56 
57 // Planar YUV420 means 2 Y lines share a UV line
58 void
59 gfx_conv_yuv420p_rgba32_sse2(AVFrame *in, AVFrame *out, int width, int height)
60 {
61 	// in and out buffers must be aligned to 32 bytes,
62 	// in should be as ffmpeg allocates it
63 	if ((off_t)out->data[0] % 32 != 0) {
64 		gfx_conv_YCbCr420p_RGB32_c(in, out, width, height);
65 		return;
66 	}
67 
68 	uint8 *ybase = (uint8 *)in->data[0];
69 	uint8 *ubase = (uint8 *)in->data[1];
70 	uint8 *vbase = (uint8 *)in->data[2];
71 	uint8 *rgbbase = (uint8 *)out->data[0];
72 
73 	int yBaseInc = in->linesize[0];
74 	int uBaseInc = in->linesize[1];
75 	int vBaseInc = in->linesize[2];
76 	int rgbBaseInc = out->linesize[0];
77 
78 	for (int i=0;i<height;i+=2) {
79 		// First Y row
80 		_Convert_YUV420P_RGBA32_SSE2(ybase, ubase, vbase, rgbbase, width);
81 		ybase += yBaseInc;
82 		rgbbase += rgbBaseInc;
83 
84 		// Second Y row but same u and v row
85 		_Convert_YUV420P_RGBA32_SSE2(ybase, ubase, vbase, rgbbase, width);
86 		ybase += yBaseInc;
87 		ubase += uBaseInc;
88 		vbase += vBaseInc;
89 		rgbbase += rgbBaseInc;
90 	}
91 }
92 
93 // Planar YUV420 means 2 Y lines share a UV line
94 void
95 gfx_conv_yuv420p_rgba32_ssse3(AVFrame *in, AVFrame *out, int width, int height)
96 {
97 	// in and out buffers must be aligned to 32 bytes,
98 	// in should be as ffmpeg allocates it
99 	if ((off_t)out->data[0] % 32 != 0) {
100 		gfx_conv_YCbCr420p_RGB32_c(in, out, width, height);
101 		return;
102 	}
103 
104 	uint8 *ybase = (uint8 *)in->data[0];
105 	uint8 *ubase = (uint8 *)in->data[1];
106 	uint8 *vbase = (uint8 *)in->data[2];
107 	uint8 *rgbbase = (uint8 *)out->data[0];
108 
109 	int yBaseInc = in->linesize[0];
110 	int uBaseInc = in->linesize[1];
111 	int vBaseInc = in->linesize[2];
112 	int rgbBaseInc = out->linesize[0];
113 
114 	for (int i=0;i<height;i+=2) {
115 		// First Y row
116 		_Convert_YUV420P_RGBA32_SSSE3(ybase, ubase, vbase, rgbbase, width);
117 		ybase += yBaseInc;
118 		rgbbase += rgbBaseInc;
119 
120 		// Second Y row but same u and v row
121 		_Convert_YUV420P_RGBA32_SSSE3(ybase, ubase, vbase, rgbbase, width);
122 		ybase += yBaseInc;
123 		ubase += uBaseInc;
124 		vbase += vBaseInc;
125 		rgbbase += rgbBaseInc;
126 	}
127 }
128 
129 // Planar YUV422 means each Y line has it's own UV line
130 void
131 gfx_conv_yuv422p_rgba32_sse(AVFrame *in, AVFrame *out, int width, int height)
132 {
133 	// in and out buffers must be aligned to 32 bytes,
134 	// in should be as ffmpeg allocates it
135 	if ((off_t)out->data[0] % 32 != 0) {
136 		gfx_conv_YCbCr422_RGB32_c(in, out, width, height);
137 		return;
138 	}
139 
140 	uint8 *ybase = (uint8 *)in->data[0];
141 	uint8 *ubase = (uint8 *)in->data[1];
142 	uint8 *vbase = (uint8 *)in->data[2];
143 	uint8 *rgbbase = (uint8 *)out->data[0];
144 
145 	int yBaseInc = in->linesize[0];
146 	int uBaseInc = in->linesize[1];
147 	int vBaseInc = in->linesize[2];
148 	int rgbBaseInc = out->linesize[0];
149 
150 	for (int i=0;i<height;i++) {
151 		_Convert_YUV420P_RGBA32_SSE(ybase, ubase, vbase, rgbbase, width);
152 		ybase += yBaseInc;
153 		ubase += uBaseInc;
154 		vbase += vBaseInc;
155 		rgbbase += rgbBaseInc;
156 	}
157 }
158 
159 // Planar YUV422 means each Y line has it's own UV line
160 void
161 gfx_conv_yuv422p_rgba32_sse2(AVFrame *in, AVFrame *out, int width, int height)
162 {
163 	// in and out buffers must be aligned to 32 bytes,
164 	// in should be as ffmpeg allocates it
165 	if ((off_t)out->data[0] % 32 != 0) {
166 		gfx_conv_YCbCr422_RGB32_c(in, out, width, height);
167 		return;
168 	}
169 
170 	uint8 *ybase = (uint8 *)in->data[0];
171 	uint8 *ubase = (uint8 *)in->data[1];
172 	uint8 *vbase = (uint8 *)in->data[2];
173 	uint8 *rgbbase = (uint8 *)out->data[0];
174 
175 	int yBaseInc = in->linesize[0];
176 	int uBaseInc = in->linesize[1];
177 	int vBaseInc = in->linesize[2];
178 	int rgbBaseInc = out->linesize[0];
179 
180 	for (int i=0;i<height;i++) {
181 		_Convert_YUV420P_RGBA32_SSE2(ybase, ubase, vbase, rgbbase, width);
182 		ybase += yBaseInc;
183 		ubase += uBaseInc;
184 		vbase += vBaseInc;
185 		rgbbase += rgbBaseInc;
186 	}
187 }
188 
189 // Planar YUV422 means each Y line has it's own UV line
190 void
191 gfx_conv_yuv422p_rgba32_ssse3(AVFrame *in, AVFrame *out, int width, int height)
192 {
193 	// in and out buffers must be aligned to 32 bytes,
194 	// in should be as ffmpeg allocates it
195 	if ((off_t)out->data[0] % 32 != 0) {
196 		gfx_conv_YCbCr422_RGB32_c(in, out, width, height);
197 		return;
198 	}
199 
200 	uint8 *ybase = (uint8 *)in->data[0];
201 	uint8 *ubase = (uint8 *)in->data[1];
202 	uint8 *vbase = (uint8 *)in->data[2];
203 	uint8 *rgbbase = (uint8 *)out->data[0];
204 
205 	int yBaseInc = in->linesize[0];
206 	int uBaseInc = in->linesize[1];
207 	int vBaseInc = in->linesize[2];
208 	int rgbBaseInc = out->linesize[0];
209 
210 	for (int i=0;i<height;i++) {
211 		_Convert_YUV420P_RGBA32_SSSE3(ybase, ubase, vbase, rgbbase, width);
212 		ybase += yBaseInc;
213 		ubase += uBaseInc;
214 		vbase += vBaseInc;
215 		rgbbase += rgbBaseInc;
216 	}
217 }
218 
219 // Packed YUV422 (YUYV)
220 void
221 gfx_conv_yuv422_rgba32_sse(AVFrame *in, AVFrame *out, int width, int height)
222 {
223 	// in and out buffers must be aligned to 16 bytes,
224 	// in should be as ffmpeg allocates it
225 	if ((off_t)out->data[0] % 16 != 0) {
226 		gfx_conv_YCbCr422_RGB32_c(in, out, width, height);
227 		return;
228 	}
229 
230 	uint8 *ybase = (uint8 *)in->data[0];
231 	uint8 *rgbbase = (uint8 *)out->data[0];
232 
233 	for (int i = 0; i <= height; i++) {
234 		_Convert_YUV422_RGBA32_SSE(ybase, rgbbase, width);
235 		ybase += in->linesize[0];
236 		rgbbase += out->linesize[0];
237 	}
238 }
239 
240 // Packed YUV422 (YUYV)
241 void
242 gfx_conv_yuv422_rgba32_sse2(AVFrame *in, AVFrame *out, int width, int height)
243 {
244 	// in and out buffers must be aligned to 32 bytes,
245 	// in should be as ffmpeg allocates it
246 	if ((off_t)out->data[0] % 32 != 0) {
247 		gfx_conv_YCbCr422_RGB32_c(in, out, width, height);
248 		return;
249 	}
250 
251 	uint8 *ybase = (uint8 *)in->data[0];
252 	uint8 *rgbbase = (uint8 *)out->data[0];
253 
254 	for (int i = 0; i <= height; i++) {
255 		_Convert_YUV422_RGBA32_SSE2(ybase, rgbbase, width);
256 		ybase += in->linesize[0];
257 		rgbbase += out->linesize[0];
258 	}
259 }
260 
261 // Packed YUV422 (YUYV)
262 void
263 gfx_conv_yuv422_rgba32_ssse3(AVFrame *in, AVFrame *out, int width, int height)
264 {
265 	// in and out buffers must be aligned to 32 bytes,
266 	// in should be as ffmpeg allocates it
267 	if ((off_t)out->data[0] % 32 != 0) {
268 		gfx_conv_YCbCr422_RGB32_c(in, out, width, height);
269 		return;
270 	}
271 
272 	uint8 *ybase = (uint8 *)in->data[0];
273 	uint8 *rgbbase = (uint8 *)out->data[0];
274 
275 	for (int i = 0; i <= height; i++) {
276 		_Convert_YUV422_RGBA32_SSSE3(ybase, rgbbase, width);
277 		ybase += in->linesize[0];
278 		rgbbase += out->linesize[0];
279 	}
280 }
281