1 #include "gfx_conv_mmx.h"
2 #include "gfx_conv_c.h"
3
4 // Packed
5 extern "C" void _Convert_YUV422_RGBA32_SSE(void *fromYPtr, void *toPtr,
6 int width);
7 extern "C" void _Convert_YUV422_RGBA32_SSE2(void *fromYPtr, void *toPtr,
8 int width);
9 extern "C" void _Convert_YUV422_RGBA32_SSSE3(void *fromYPtr, void *toPtr,
10 int width);
11
12 // Planar
13 extern "C" void _Convert_YUV420P_RGBA32_SSE(void *fromYPtr, void *fromUPtr,
14 void *fromVPtr, void *toPtr, int width);
15 extern "C" void _Convert_YUV420P_RGBA32_SSE2(void *fromYPtr, void *fromUPtr,
16 void *fromVPtr, void *toPtr, int width);
17 extern "C" void _Convert_YUV420P_RGBA32_SSSE3(void *fromYPtr, void *fromUPtr,
18 void *fromVPtr, void *toPtr, int width);
19
20
21 // Planar YUV420 means 2 Y lines share a UV line
22 void
gfx_conv_yuv420p_rgba32_sse(AVFrame * in,AVFrame * out,int width,int height)23 gfx_conv_yuv420p_rgba32_sse(AVFrame *in, AVFrame *out, int width, int height)
24 {
25 // in and out buffers must be aligned to 16 bytes,
26 // in should be as ffmpeg allocates it
27 if ((off_t)out->data[0] % 16 != 0) {
28 gfx_conv_YCbCr420p_RGB32_c(in, out, width, height);
29 return;
30 }
31
32 uint8 *ybase = (uint8 *)in->data[0];
33 uint8 *ubase = (uint8 *)in->data[1];
34 uint8 *vbase = (uint8 *)in->data[2];
35 uint8 *rgbbase = (uint8 *)out->data[0];
36
37 int yBaseInc = in->linesize[0];
38 int uBaseInc = in->linesize[1];
39 int vBaseInc = in->linesize[2];
40 int rgbBaseInc = out->linesize[0];
41
42 for (int i=0;i<height;i+=2) {
43 // First Y row
44 _Convert_YUV420P_RGBA32_SSE(ybase, ubase, vbase, rgbbase, width);
45 ybase += yBaseInc;
46 rgbbase += rgbBaseInc;
47
48 // Second Y row but same u and v row
49 _Convert_YUV420P_RGBA32_SSE(ybase, ubase, vbase, rgbbase, width);
50 ybase += yBaseInc;
51 ubase += uBaseInc;
52 vbase += vBaseInc;
53 rgbbase += rgbBaseInc;
54 }
55 }
56
57 // Planar YUV420 means 2 Y lines share a UV line
58 void
gfx_conv_yuv420p_rgba32_sse2(AVFrame * in,AVFrame * out,int width,int height)59 gfx_conv_yuv420p_rgba32_sse2(AVFrame *in, AVFrame *out, int width, int height)
60 {
61 // in and out buffers must be aligned to 32 bytes,
62 // in should be as ffmpeg allocates it
63 if ((off_t)out->data[0] % 32 != 0) {
64 gfx_conv_YCbCr420p_RGB32_c(in, out, width, height);
65 return;
66 }
67
68 uint8 *ybase = (uint8 *)in->data[0];
69 uint8 *ubase = (uint8 *)in->data[1];
70 uint8 *vbase = (uint8 *)in->data[2];
71 uint8 *rgbbase = (uint8 *)out->data[0];
72
73 int yBaseInc = in->linesize[0];
74 int uBaseInc = in->linesize[1];
75 int vBaseInc = in->linesize[2];
76 int rgbBaseInc = out->linesize[0];
77
78 for (int i=0;i<height;i+=2) {
79 // First Y row
80 _Convert_YUV420P_RGBA32_SSE2(ybase, ubase, vbase, rgbbase, width);
81 ybase += yBaseInc;
82 rgbbase += rgbBaseInc;
83
84 // Second Y row but same u and v row
85 _Convert_YUV420P_RGBA32_SSE2(ybase, ubase, vbase, rgbbase, width);
86 ybase += yBaseInc;
87 ubase += uBaseInc;
88 vbase += vBaseInc;
89 rgbbase += rgbBaseInc;
90 }
91 }
92
93 // Planar YUV420 means 2 Y lines share a UV line
94 void
gfx_conv_yuv420p_rgba32_ssse3(AVFrame * in,AVFrame * out,int width,int height)95 gfx_conv_yuv420p_rgba32_ssse3(AVFrame *in, AVFrame *out, int width, int height)
96 {
97 // in and out buffers must be aligned to 32 bytes,
98 // in should be as ffmpeg allocates it
99 if ((off_t)out->data[0] % 32 != 0) {
100 gfx_conv_YCbCr420p_RGB32_c(in, out, width, height);
101 return;
102 }
103
104 uint8 *ybase = (uint8 *)in->data[0];
105 uint8 *ubase = (uint8 *)in->data[1];
106 uint8 *vbase = (uint8 *)in->data[2];
107 uint8 *rgbbase = (uint8 *)out->data[0];
108
109 int yBaseInc = in->linesize[0];
110 int uBaseInc = in->linesize[1];
111 int vBaseInc = in->linesize[2];
112 int rgbBaseInc = out->linesize[0];
113
114 for (int i=0;i<height;i+=2) {
115 // First Y row
116 _Convert_YUV420P_RGBA32_SSSE3(ybase, ubase, vbase, rgbbase, width);
117 ybase += yBaseInc;
118 rgbbase += rgbBaseInc;
119
120 // Second Y row but same u and v row
121 _Convert_YUV420P_RGBA32_SSSE3(ybase, ubase, vbase, rgbbase, width);
122 ybase += yBaseInc;
123 ubase += uBaseInc;
124 vbase += vBaseInc;
125 rgbbase += rgbBaseInc;
126 }
127 }
128
129 // Planar YUV422 means each Y line has it's own UV line
130 void
gfx_conv_yuv422p_rgba32_sse(AVFrame * in,AVFrame * out,int width,int height)131 gfx_conv_yuv422p_rgba32_sse(AVFrame *in, AVFrame *out, int width, int height)
132 {
133 // in and out buffers must be aligned to 32 bytes,
134 // in should be as ffmpeg allocates it
135 if ((off_t)out->data[0] % 32 != 0) {
136 gfx_conv_YCbCr422_RGB32_c(in, out, width, height);
137 return;
138 }
139
140 uint8 *ybase = (uint8 *)in->data[0];
141 uint8 *ubase = (uint8 *)in->data[1];
142 uint8 *vbase = (uint8 *)in->data[2];
143 uint8 *rgbbase = (uint8 *)out->data[0];
144
145 int yBaseInc = in->linesize[0];
146 int uBaseInc = in->linesize[1];
147 int vBaseInc = in->linesize[2];
148 int rgbBaseInc = out->linesize[0];
149
150 for (int i=0;i<height;i++) {
151 _Convert_YUV420P_RGBA32_SSE(ybase, ubase, vbase, rgbbase, width);
152 ybase += yBaseInc;
153 ubase += uBaseInc;
154 vbase += vBaseInc;
155 rgbbase += rgbBaseInc;
156 }
157 }
158
159 // Planar YUV422 means each Y line has it's own UV line
160 void
gfx_conv_yuv422p_rgba32_sse2(AVFrame * in,AVFrame * out,int width,int height)161 gfx_conv_yuv422p_rgba32_sse2(AVFrame *in, AVFrame *out, int width, int height)
162 {
163 // in and out buffers must be aligned to 32 bytes,
164 // in should be as ffmpeg allocates it
165 if ((off_t)out->data[0] % 32 != 0) {
166 gfx_conv_YCbCr422_RGB32_c(in, out, width, height);
167 return;
168 }
169
170 uint8 *ybase = (uint8 *)in->data[0];
171 uint8 *ubase = (uint8 *)in->data[1];
172 uint8 *vbase = (uint8 *)in->data[2];
173 uint8 *rgbbase = (uint8 *)out->data[0];
174
175 int yBaseInc = in->linesize[0];
176 int uBaseInc = in->linesize[1];
177 int vBaseInc = in->linesize[2];
178 int rgbBaseInc = out->linesize[0];
179
180 for (int i=0;i<height;i++) {
181 _Convert_YUV420P_RGBA32_SSE2(ybase, ubase, vbase, rgbbase, width);
182 ybase += yBaseInc;
183 ubase += uBaseInc;
184 vbase += vBaseInc;
185 rgbbase += rgbBaseInc;
186 }
187 }
188
189 // Planar YUV422 means each Y line has it's own UV line
190 void
gfx_conv_yuv422p_rgba32_ssse3(AVFrame * in,AVFrame * out,int width,int height)191 gfx_conv_yuv422p_rgba32_ssse3(AVFrame *in, AVFrame *out, int width, int height)
192 {
193 // in and out buffers must be aligned to 32 bytes,
194 // in should be as ffmpeg allocates it
195 if ((off_t)out->data[0] % 32 != 0) {
196 gfx_conv_YCbCr422_RGB32_c(in, out, width, height);
197 return;
198 }
199
200 uint8 *ybase = (uint8 *)in->data[0];
201 uint8 *ubase = (uint8 *)in->data[1];
202 uint8 *vbase = (uint8 *)in->data[2];
203 uint8 *rgbbase = (uint8 *)out->data[0];
204
205 int yBaseInc = in->linesize[0];
206 int uBaseInc = in->linesize[1];
207 int vBaseInc = in->linesize[2];
208 int rgbBaseInc = out->linesize[0];
209
210 for (int i=0;i<height;i++) {
211 _Convert_YUV420P_RGBA32_SSSE3(ybase, ubase, vbase, rgbbase, width);
212 ybase += yBaseInc;
213 ubase += uBaseInc;
214 vbase += vBaseInc;
215 rgbbase += rgbBaseInc;
216 }
217 }
218
219 // Packed YUV422 (YUYV)
220 void
gfx_conv_yuv422_rgba32_sse(AVFrame * in,AVFrame * out,int width,int height)221 gfx_conv_yuv422_rgba32_sse(AVFrame *in, AVFrame *out, int width, int height)
222 {
223 // in and out buffers must be aligned to 16 bytes,
224 // in should be as ffmpeg allocates it
225 if ((off_t)out->data[0] % 16 != 0) {
226 gfx_conv_YCbCr422_RGB32_c(in, out, width, height);
227 return;
228 }
229
230 uint8 *ybase = (uint8 *)in->data[0];
231 uint8 *rgbbase = (uint8 *)out->data[0];
232
233 for (int i = 0; i <= height; i++) {
234 _Convert_YUV422_RGBA32_SSE(ybase, rgbbase, width);
235 ybase += in->linesize[0];
236 rgbbase += out->linesize[0];
237 }
238 }
239
240 // Packed YUV422 (YUYV)
241 void
gfx_conv_yuv422_rgba32_sse2(AVFrame * in,AVFrame * out,int width,int height)242 gfx_conv_yuv422_rgba32_sse2(AVFrame *in, AVFrame *out, int width, int height)
243 {
244 // in and out buffers must be aligned to 32 bytes,
245 // in should be as ffmpeg allocates it
246 if ((off_t)out->data[0] % 32 != 0) {
247 gfx_conv_YCbCr422_RGB32_c(in, out, width, height);
248 return;
249 }
250
251 uint8 *ybase = (uint8 *)in->data[0];
252 uint8 *rgbbase = (uint8 *)out->data[0];
253
254 for (int i = 0; i <= height; i++) {
255 _Convert_YUV422_RGBA32_SSE2(ybase, rgbbase, width);
256 ybase += in->linesize[0];
257 rgbbase += out->linesize[0];
258 }
259 }
260
261 // Packed YUV422 (YUYV)
262 void
gfx_conv_yuv422_rgba32_ssse3(AVFrame * in,AVFrame * out,int width,int height)263 gfx_conv_yuv422_rgba32_ssse3(AVFrame *in, AVFrame *out, int width, int height)
264 {
265 // in and out buffers must be aligned to 32 bytes,
266 // in should be as ffmpeg allocates it
267 if ((off_t)out->data[0] % 32 != 0) {
268 gfx_conv_YCbCr422_RGB32_c(in, out, width, height);
269 return;
270 }
271
272 uint8 *ybase = (uint8 *)in->data[0];
273 uint8 *rgbbase = (uint8 *)out->data[0];
274
275 for (int i = 0; i <= height; i++) {
276 _Convert_YUV422_RGBA32_SSSE3(ybase, rgbbase, width);
277 ybase += in->linesize[0];
278 rgbbase += out->linesize[0];
279 }
280 }
281