xref: /haiku/src/servers/app/drawing/Painter/bitmap_painter/DrawBitmapBilinear.h (revision da8162be21b36442f34a731873d2358a0d63c25a)
1 /*
2  * Copyright 2009, Christian Packmann.
3  * Copyright 2008, Andrej Spielmann <andrej.spielmann@seh.ox.ac.uk>.
4  * Copyright 2005-2014, Stephan Aßmus <superstippi@gmx.de>.
5  * Copyright 2015, Julian Harnath <julian.harnath@rwth-aachen.de>
6  * All rights reserved. Distributed under the terms of the MIT License.
7  */
8 #ifndef DRAW_BITMAP_BILINEAR_H
9 #define DRAW_BITMAP_BILINEAR_H
10 
11 #include "Painter.h"
12 
13 #include <typeinfo>
14 
15 
16 // Prototypes for assembler routines
17 extern "C" {
18 	void bilinear_scale_xloop_mmxsse(const uint8* src, void* dst,
19 		void* xWeights, uint32 xmin, uint32 xmax, uint32 wTop, uint32 srcBPR);
20 }
21 
22 
23 extern uint32 gSIMDFlags;
24 
25 
26 namespace BitmapPainterPrivate {
27 
28 
29 struct FilterInfo {
30 	uint16 index;	// index into source bitmap row/column
31 	uint16 weight;	// weight of the pixel at index [0..255]
32 };
33 
34 
35 struct FilterData {
36 	FilterInfo* fWeightsX;
37 	FilterInfo* fWeightsY;
38 	uint32 fIndexOffsetX;
39 	uint32 fIndexOffsetY;
40 };
41 
42 
43 template<class OptimizedVersion>
44 struct DrawBitmapBilinearOptimized {
DrawDrawBitmapBilinearOptimized45 	void Draw(PainterAggInterface& aggInterface, const BRect& destinationRect,
46 		agg::rendering_buffer* bitmap, const FilterData& filterData)
47 	{
48 		fSource = bitmap;
49 		fSourceBytesPerRow = bitmap->stride();
50 		fDestination = NULL;
51 		fDestinationBytesPerRow = aggInterface.fBuffer.stride();
52 		fWeightsX = filterData.fWeightsX;
53 		fWeightsY = filterData.fWeightsY;
54 
55 		const int32 left = (int32)destinationRect.left;
56 		const int32 top = (int32)destinationRect.top;
57 		const int32 right = (int32)destinationRect.right;
58 		const int32 bottom = (int32)destinationRect.bottom;
59 
60 		renderer_base& baseRenderer = aggInterface.fBaseRenderer;
61 
62 		// iterate over clipping boxes
63 		baseRenderer.first_clip_box();
64 		do {
65 			const int32 x1 = max_c(baseRenderer.xmin(), left);
66 			const int32 x2 = min_c(baseRenderer.xmax(), right);
67 			if (x1 > x2)
68 				continue;
69 
70 			int32 y1 = max_c(baseRenderer.ymin(), top);
71 			int32 y2 = min_c(baseRenderer.ymax(), bottom);
72 			if (y1 > y2)
73 				continue;
74 
75 			// buffer offset into destination
76 			fDestination = aggInterface.fBuffer.row_ptr(y1) + x1 * 4;
77 
78 			// x and y are needed as indices into the weight arrays, so the
79 			// offset into the target buffer needs to be compensated
80 			const int32 xIndexL = x1 - left - filterData.fIndexOffsetX;
81 			const int32 xIndexR = x2 - left - filterData.fIndexOffsetX;
82 			y1 -= top + filterData.fIndexOffsetY;
83 			y2 -= top + filterData.fIndexOffsetY;
84 
85 			//printf("x: %ld - %ld\n", xIndexL, xIndexR);
86 			//printf("y: %ld - %ld\n", y1, y2);
87 
88 			static_cast<OptimizedVersion*>(this)->DrawToClipRect(
89 				xIndexL, xIndexR, y1, y2);
90 
91 		} while (baseRenderer.next_clip_box());
92 	}
93 
94 protected:
95 	agg::rendering_buffer*	fSource;
96 	uint32					fSourceBytesPerRow;
97 	uint8*					fDestination;
98 	uint32					fDestinationBytesPerRow;
99 	FilterInfo*				fWeightsX;
100 	FilterInfo*				fWeightsY;
101 };
102 
103 
104 struct ColorTypeRgb {
105 	static void
InterpolateColorTypeRgb106 	Interpolate(uint32* t, const uint8* s, uint32 sourceBytesPerRow,
107 		uint16 wLeft, uint16 wTop, uint16 wRight, uint16 wBottom)
108 	{
109 		// left and right of top row
110 		t[0] = (s[0] * wLeft + s[4] * wRight) * wTop;
111 		t[1] = (s[1] * wLeft + s[5] * wRight) * wTop;
112 		t[2] = (s[2] * wLeft + s[6] * wRight) * wTop;
113 
114 		// left and right of bottom row
115 		s += sourceBytesPerRow;
116 		t[0] += (s[0] * wLeft + s[4] * wRight) * wBottom;
117 		t[1] += (s[1] * wLeft + s[5] * wRight) * wBottom;
118 		t[2] += (s[2] * wLeft + s[6] * wRight) * wBottom;
119 
120 		t[0] >>= 16;
121 		t[1] >>= 16;
122 		t[2] >>= 16;
123 	}
124 
125 	static void
InterpolateLastColumnColorTypeRgb126 	InterpolateLastColumn(uint32* t, const uint8* s, const uint8* sBottom,
127 		uint16 wTop, uint16 wBottom)
128 	{
129 		t[0] = (s[0] * wTop + sBottom[0] * wBottom) >> 8;
130 		t[1] = (s[1] * wTop + sBottom[1] * wBottom) >> 8;
131 		t[2] = (s[2] * wTop + sBottom[2] * wBottom) >> 8;
132 	}
133 
134 	static void
InterpolateLastRowColorTypeRgb135 	InterpolateLastRow(uint32* t, const uint8* s, uint16 wLeft,
136 		uint16 wRight)
137 	{
138 		t[0] = (s[0] * wLeft + s[4] * wRight) >> 8;
139 		t[1] = (s[1] * wLeft + s[5] * wRight) >> 8;
140 		t[2] = (s[2] * wLeft + s[6] * wRight) >> 8;
141 	}
142 };
143 
144 
145 struct ColorTypeRgba {
146 	static void
InterpolateColorTypeRgba147 	Interpolate(uint32* t, const uint8* s, uint32 sourceBytesPerRow,
148 		uint16 wLeft, uint16 wTop, uint16 wRight, uint16 wBottom)
149 	{
150 		// left and right of top row
151 		t[0] = (s[0] * wLeft + s[4] * wRight) * wTop;
152 		t[1] = (s[1] * wLeft + s[5] * wRight) * wTop;
153 		t[2] = (s[2] * wLeft + s[6] * wRight) * wTop;
154 		t[3] = (s[3] * wLeft + s[7] * wRight) * wTop;
155 
156 		// left and right of bottom row
157 		s += sourceBytesPerRow;
158 
159 		t[0] += (s[0] * wLeft + s[4] * wRight) * wBottom;
160 		t[1] += (s[1] * wLeft + s[5] * wRight) * wBottom;
161 		t[2] += (s[2] * wLeft + s[6] * wRight) * wBottom;
162 		t[3] += (s[3] * wLeft + s[7] * wRight) * wBottom;
163 
164 		t[0] >>= 16;
165 		t[1] >>= 16;
166 		t[2] >>= 16;
167 		t[3] >>= 16;
168 	}
169 
170 	static void
InterpolateLastColumnColorTypeRgba171 	InterpolateLastColumn(uint32* t, const uint8* s, const uint8* sBottom,
172 		uint16 wTop, uint16 wBottom)
173 	{
174 		t[0] = (s[0] * wTop + sBottom[0] * wBottom) >> 8;
175 		t[1] = (s[1] * wTop + sBottom[1] * wBottom) >> 8;
176 		t[2] = (s[2] * wTop + sBottom[2] * wBottom) >> 8;
177 		t[3] = (s[3] * wTop + sBottom[3] * wBottom) >> 8;
178 	}
179 
180 	static void
InterpolateLastRowColorTypeRgba181 	InterpolateLastRow(uint32* t, const uint8* s, uint16 wLeft,
182 		uint16 wRight)
183 	{
184 		t[0] = (s[0] * wLeft + s[4] * wRight) >> 8;
185 		t[1] = (s[1] * wLeft + s[5] * wRight) >> 8;
186 		t[2] = (s[2] * wLeft + s[6] * wRight) >> 8;
187 		t[3] = (s[3] * wLeft + s[7] * wRight) >> 8;
188 	}
189 };
190 
191 
192 struct DrawModeCopy {
193 	static void
BlendDrawModeCopy194 	Blend(uint8*& d, uint32* t)
195 	{
196 		d[0] = t[0];
197 		d[1] = t[1];
198 		d[2] = t[2];
199 		d += 4;
200 	}
201 };
202 
203 
204 struct DrawModeAlphaOverlay {
205 	static void
BlendDrawModeAlphaOverlay206 	Blend(uint8*& d, uint32* t)
207 	{
208 		uint8 t0 = t[0];
209 		uint8 t1 = t[1];
210 		uint8 t2 = t[2];
211 		uint8 t3 = t[3];
212 
213 		if (t3 == 255) {
214 			d[0] = t0;
215 			d[1] = t1;
216 			d[2] = t2;
217 		} else {
218 			d[0] = ((t0 - d[0]) * t3 + (d[0] << 8)) >> 8;
219 			d[1] = ((t1 - d[1]) * t3 + (d[1] << 8)) >> 8;
220 			d[2] = ((t2 - d[2]) * t3 + (d[2] << 8)) >> 8;
221 		}
222 
223 		d += 4;
224 	}
225 };
226 
227 
228 template<class ColorType, class DrawMode>
229 struct BilinearDefault :
230 	DrawBitmapBilinearOptimized<BilinearDefault<ColorType, DrawMode> > {
231 
DrawToClipRectBilinearDefault232 	void DrawToClipRect(int32 xIndexL, int32 xIndexR, int32 y1, int32 y2)
233 	{
234 		// In this mode we anticipate many pixels wich need filtering,
235 		// there are no special cases for direct hit pixels except for
236 		// the last column/row and the right/bottom corner pixel.
237 
238 		// The last column/row handling does not need to be performed
239 		// for all clipping rects!
240 		int32 yMax = y2;
241 		if (this->fWeightsY[yMax].weight == 255)
242 			yMax--;
243 		int32 xIndexMax = xIndexR;
244 		if (this->fWeightsX[xIndexMax].weight == 255)
245 			xIndexMax--;
246 
247 		for (; y1 <= yMax; y1++) {
248 			// cache the weight of the top and bottom row
249 			const uint16 wTop = this->fWeightsY[y1].weight;
250 			const uint16 wBottom = 255 - this->fWeightsY[y1].weight;
251 
252 			// buffer offset into source (top row)
253 			const uint8* src = this->fSource->row_ptr(
254 				this->fWeightsY[y1].index);
255 
256 			// buffer handle for destination to be incremented per
257 			// pixel
258 			uint8* d = this->fDestination;
259 
260 			for (int32 x = xIndexL; x <= xIndexMax; x++) {
261 				const uint8* s = src + this->fWeightsX[x].index;
262 
263 				// calculate the weighted sum of all four
264 				// interpolated pixels
265 				const uint16 wLeft = this->fWeightsX[x].weight;
266 				const uint16 wRight = 255 - wLeft;
267 
268 				uint32 t[4];
269 
270 				if (this->fSource->height() > 1) {
271 					ColorType::Interpolate(&t[0], s, this->fSourceBytesPerRow,
272 						wLeft, wTop, wRight, wBottom);
273 				} else {
274 					ColorType::InterpolateLastRow(&t[0], s,  wLeft, wRight);
275 				}
276 				DrawMode::Blend(d, &t[0]);
277 			}
278 			// last column of pixels if necessary
279 			if (xIndexMax < xIndexR && this->fSource->height() > 1) {
280 				const uint8* s = src + this->fWeightsX[xIndexR].index;
281 				const uint8* sBottom = s + this->fSourceBytesPerRow;
282 
283 				uint32 t[4];
284 				ColorType::InterpolateLastColumn(&t[0], s, sBottom, wTop,
285 					wBottom);
286 				DrawMode::Blend(d, &t[0]);
287 			}
288 
289 			this->fDestination += this->fDestinationBytesPerRow;
290 		}
291 
292 		// last row of pixels if necessary
293 		// buffer offset into source (bottom row)
294 		const uint8* src
295 			= this->fSource->row_ptr(this->fWeightsY[y2].index);
296 		// buffer handle for destination to be incremented per pixel
297 		uint8* d = this->fDestination;
298 
299 		if (yMax < y2) {
300 			for (int32 x = xIndexL; x <= xIndexMax; x++) {
301 				const uint8* s = src + this->fWeightsX[x].index;
302 				const uint16 wLeft = this->fWeightsX[x].weight;
303 				const uint16 wRight = 255 - wLeft;
304 				uint32 t[4];
305 				ColorType::InterpolateLastRow(&t[0], s, wLeft, wRight);
306 				DrawMode::Blend(d, &t[0]);
307 			}
308 		}
309 
310 		// pixel in bottom right corner if necessary
311 		if (yMax < y2 && xIndexMax < xIndexR) {
312 			const uint8* s = src + this->fWeightsX[xIndexR].index;
313 			*(uint32*)d = *(uint32*)s;
314 		}
315 	}
316 };
317 
318 
319 struct BilinearLowFilterRatio :
320 	DrawBitmapBilinearOptimized<BilinearLowFilterRatio> {
DrawToClipRectBilinearLowFilterRatio321 	void DrawToClipRect(int32 xIndexL, int32 xIndexR, int32 y1, int32 y2)
322 	{
323 		// In this mode, we anticipate to hit many destination pixels
324 		// that map directly to a source pixel, we have more branches
325 		// in the inner loop but save time because of the special
326 		// cases. If there are too few direct hit pixels, the branches
327 		// only waste time.
328 
329 		for (; y1 <= y2; y1++) {
330 			// cache the weight of the top and bottom row
331 			const uint16 wTop = fWeightsY[y1].weight;
332 			const uint16 wBottom = 255 - fWeightsY[y1].weight;
333 
334 			// buffer offset into source (top row)
335 			const uint8* src = fSource->row_ptr(fWeightsY[y1].index);
336 			// buffer handle for destination to be incremented per
337 			// pixel
338 			uint8* d = fDestination;
339 
340 			if (wTop == 255) {
341 				for (int32 x = xIndexL; x <= xIndexR; x++) {
342 					const uint8* s = src + fWeightsX[x].index;
343 					// This case is important to prevent out
344 					// of bounds access at bottom edge of the source
345 					// bitmap. If the scale is low and integer, it will
346 					// also help the speed.
347 					if (fWeightsX[x].weight == 255) {
348 						// As above, but to prevent out of bounds
349 						// on the right edge.
350 						*(uint32*)d = *(uint32*)s;
351 					} else {
352 						// Only the left and right pixels are
353 						// interpolated, since the top row has 100%
354 						// weight.
355 						const uint16 wLeft = fWeightsX[x].weight;
356 						const uint16 wRight = 255 - wLeft;
357 						d[0] = (s[0] * wLeft + s[4] * wRight) >> 8;
358 						d[1] = (s[1] * wLeft + s[5] * wRight) >> 8;
359 						d[2] = (s[2] * wLeft + s[6] * wRight) >> 8;
360 					}
361 					d += 4;
362 				}
363 			} else {
364 				for (int32 x = xIndexL; x <= xIndexR; x++) {
365 					const uint8* s = src + fWeightsX[x].index;
366 					if (fWeightsX[x].weight == 255) {
367 						// Prevent out of bounds access on the right
368 						// edge or simply speed up.
369 						const uint8* sBottom = s + fSourceBytesPerRow;
370 						d[0] = (s[0] * wTop + sBottom[0] * wBottom)
371 							>> 8;
372 						d[1] = (s[1] * wTop + sBottom[1] * wBottom)
373 							>> 8;
374 						d[2] = (s[2] * wTop + sBottom[2] * wBottom)
375 							>> 8;
376 					} else {
377 						// calculate the weighted sum of all four
378 						// interpolated pixels
379 						const uint16 wLeft = fWeightsX[x].weight;
380 						const uint16 wRight = 255 - wLeft;
381 						// left and right of top row
382 						uint32 t0 = (s[0] * wLeft + s[4] * wRight)
383 							* wTop;
384 						uint32 t1 = (s[1] * wLeft + s[5] * wRight)
385 							* wTop;
386 						uint32 t2 = (s[2] * wLeft + s[6] * wRight)
387 							* wTop;
388 
389 						// left and right of bottom row
390 						s += fSourceBytesPerRow;
391 						t0 += (s[0] * wLeft + s[4] * wRight) * wBottom;
392 						t1 += (s[1] * wLeft + s[5] * wRight) * wBottom;
393 						t2 += (s[2] * wLeft + s[6] * wRight) * wBottom;
394 
395 						d[0] = t0 >> 16;
396 						d[1] = t1 >> 16;
397 						d[2] = t2 >> 16;
398 					}
399 					d += 4;
400 				}
401 			}
402 			fDestination += fDestinationBytesPerRow;
403 		}
404 	}
405 };
406 
407 
408 #ifdef __i386__
409 
410 struct BilinearSimd : DrawBitmapBilinearOptimized<BilinearSimd> {
DrawToClipRectBilinearSimd411 	void DrawToClipRect(int32 xIndexL, int32 xIndexR, int32 y1, int32 y2)
412 	{
413 		// Basically the same as the "standard" mode, but we use SIMD
414 		// routines for the processing of the single display lines.
415 
416 		// The last column/row handling does not need to be performed
417 		// for all clipping rects!
418 		int32 yMax = y2;
419 		if (fWeightsY[yMax].weight == 255)
420 			yMax--;
421 		int32 xIndexMax = xIndexR;
422 		if (fWeightsX[xIndexMax].weight == 255)
423 			xIndexMax--;
424 
425 		for (; y1 <= yMax; y1++) {
426 			// cache the weight of the top and bottom row
427 			const uint16 wTop = fWeightsY[y1].weight;
428 			const uint16 wBottom = 255 - fWeightsY[y1].weight;
429 
430 			// buffer offset into source (top row)
431 			const uint8* src = fSource->row_ptr(fWeightsY[y1].index);
432 			// buffer handle for destination to be incremented per
433 			// pixel
434 			uint8* d = fDestination;
435 			bilinear_scale_xloop_mmxsse(src, fDestination, fWeightsX, xIndexL,
436 				xIndexMax, wTop, fSourceBytesPerRow);
437 			// increase pointer by processed pixels
438 			d += (xIndexMax - xIndexL + 1) * 4;
439 
440 			// last column of pixels if necessary
441 			if (xIndexMax < xIndexR) {
442 				const uint8* s = src + fWeightsX[xIndexR].index;
443 				const uint8* sBottom = s + fSourceBytesPerRow;
444 				d[0] = (s[0] * wTop + sBottom[0] * wBottom) >> 8;
445 				d[1] = (s[1] * wTop + sBottom[1] * wBottom) >> 8;
446 				d[2] = (s[2] * wTop + sBottom[2] * wBottom) >> 8;
447 			}
448 
449 			fDestination += fDestinationBytesPerRow;
450 		}
451 
452 		// last row of pixels if necessary
453 		// buffer offset into source (bottom row)
454 		const uint8* src = fSource->row_ptr(fWeightsY[y2].index);
455 		// buffer handle for destination to be incremented per pixel
456 		uint8* d = fDestination;
457 
458 		if (yMax < y2) {
459 			for (int32 x = xIndexL; x <= xIndexMax; x++) {
460 				const uint8* s = src + fWeightsX[x].index;
461 				const uint16 wLeft = fWeightsX[x].weight;
462 				const uint16 wRight = 255 - wLeft;
463 				d[0] = (s[0] * wLeft + s[4] * wRight) >> 8;
464 				d[1] = (s[1] * wLeft + s[5] * wRight) >> 8;
465 				d[2] = (s[2] * wLeft + s[6] * wRight) >> 8;
466 				d += 4;
467 			}
468 		}
469 
470 		// pixel in bottom right corner if necessary
471 		if (yMax < y2 && xIndexMax < xIndexR) {
472 			const uint8* s = src + fWeightsX[xIndexR].index;
473 			*(uint32*)d = *(uint32*)s;
474 		}
475 	}
476 };
477 
478 #endif	// __i386__
479 
480 
481 template<class ColorType, class DrawMode>
482 struct DrawBitmapBilinear {
483 	void
DrawDrawBitmapBilinear484 	Draw(const Painter* painter, PainterAggInterface& aggInterface,
485 		agg::rendering_buffer& bitmap, BPoint offset,
486 		double scaleX, double scaleY, BRect destinationRect)
487 	{
488 		//bigtime_t now = system_time();
489 		uint32 dstWidth = destinationRect.IntegerWidth() + 1;
490 		uint32 dstHeight = destinationRect.IntegerHeight() + 1;
491 		uint32 srcWidth = bitmap.width();
492 		uint32 srcHeight = bitmap.height();
493 
494 		// Do not calculate more filter weights than necessary and also
495 		// keep the stack based allocations reasonably sized
496 		const BRegion& clippingRegion = *painter->ClippingRegion();
497 		if (clippingRegion.Frame().IntegerWidth() + 1 < (int32)dstWidth)
498 			dstWidth = clippingRegion.Frame().IntegerWidth() + 1;
499 		if (clippingRegion.Frame().IntegerHeight() + 1 < (int32)dstHeight)
500 			dstHeight = clippingRegion.Frame().IntegerHeight() + 1;
501 
502 		// When calculating less filter weights than specified by
503 		// destinationRect, we need to compensate the offset.
504 		FilterData filterData;
505 		filterData.fIndexOffsetX = 0;
506 		filterData.fIndexOffsetY = 0;
507 		if (clippingRegion.Frame().left > destinationRect.left) {
508 			filterData.fIndexOffsetX = (int32)(clippingRegion.Frame().left
509 				- destinationRect.left);
510 		}
511 		if (clippingRegion.Frame().top > destinationRect.top) {
512 			filterData.fIndexOffsetY = (int32)(clippingRegion.Frame().top
513 				- destinationRect.top);
514 		}
515 
516 //#define FILTER_INFOS_ON_HEAP
517 #ifdef FILTER_INFOS_ON_HEAP
518 		filterData.fWeightsX = new (nothrow) FilterInfo[dstWidth];
519 		filterData.fWeightsY = new (nothrow) FilterInfo[dstHeight];
520 		if (filterData.fWeightsX == NULL || filterData.fWeightsY == NULL) {
521 			delete[] filterData.fWeightsX;
522 			delete[] filterData.fWeightsY;
523 			return;
524 		}
525 #else
526 		// stack based saves about 200µs on 1.85 GHz Core 2 Duo
527 		// should not pose a problem with stack overflows
528 		// (needs around 12Kb for 1920x1200)
529 		FilterInfo xWeights[dstWidth];
530 		FilterInfo yWeights[dstHeight];
531 		filterData.fWeightsX = &xWeights[0];
532 		filterData.fWeightsY = &yWeights[0];
533 #endif
534 
535 		// Extract the cropping information for the source bitmap,
536 		// If only a part of the source bitmap is to be drawn with scale,
537 		// the offset will be different from the destinationRect left top
538 		// corner.
539 		const int32 xBitmapShift = (int32)(destinationRect.left - offset.x);
540 		const int32 yBitmapShift = (int32)(destinationRect.top - offset.y);
541 
542 		for (uint32 i = 0; i < dstWidth; i++) {
543 			// fractional index into source
544 			// NOTE: It is very important to calculate the fractional index
545 			// into the source pixel grid like this to prevent out of bounds
546 			// access! It will result in the rightmost pixel of the destination
547 			// to access the rightmost pixel of the source with a weighting
548 			// of 255. This in turn will trigger an optimization in the loop
549 			// that also prevents out of bounds access.
550 			float index = (i + filterData.fIndexOffsetX) * (srcWidth - 1)
551 				/ (srcWidth * scaleX - 1);
552 			// round down to get the left pixel
553 			filterData.fWeightsX[i].index = (uint16)index;
554 			filterData.fWeightsX[i].weight =
555 				255 - (uint16)((index - filterData.fWeightsX[i].index) * 255);
556 			// handle cropped source bitmap
557 			filterData.fWeightsX[i].index += xBitmapShift;
558 			// precompute index for 32 bit pixels
559 			filterData.fWeightsX[i].index *= 4;
560 		}
561 
562 		for (uint32 i = 0; i < dstHeight; i++) {
563 			// fractional index into source
564 			// NOTE: It is very important to calculate the fractional index
565 			// into the source pixel grid like this to prevent out of bounds
566 			// access! It will result in the bottommost pixel of the
567 			// destination to access the bottommost pixel of the source with a
568 			// weighting of 255. This in turn will trigger an optimization in
569 			// the loop that also prevents out of bounds access.
570 			float index = (i + filterData.fIndexOffsetY) * (srcHeight - 1)
571 				/ (srcHeight * scaleY - 1);
572 			// round down to get the top pixel
573 			filterData.fWeightsY[i].index = (uint16)index;
574 			filterData.fWeightsY[i].weight =
575 				255 - (uint16)((index - filterData.fWeightsY[i].index) * 255);
576 			// handle cropped source bitmap
577 			filterData.fWeightsY[i].index += yBitmapShift;
578 		}
579 		//printf("X: %d/%d ... %d/%d, %d/%d (%ld)\n",
580 		//	xWeights[0].index, xWeights[0].weight,
581 		//	xWeights[dstWidth - 2].index, xWeights[dstWidth - 2].weight,
582 		//	xWeights[dstWidth - 1].index, xWeights[dstWidth - 1].weight,
583 		//	dstWidth);
584 		//printf("Y: %d/%d ... %d/%d, %d/%d (%ld)\n",
585 		//	yWeights[0].index, yWeights[0].weight,
586 		//	yWeights[dstHeight - 2].index, yWeights[dstHeight - 2].weight,
587 		//	yWeights[dstHeight - 1].index, yWeights[dstHeight - 1].weight,
588 		//	dstHeight);
589 
590 		// Figure out which version of the code we want to use...
591 		enum {
592 			kOptimizeForLowFilterRatio = 0,
593 			kUseDefaultVersion,
594 			kUseSIMDVersion
595 		};
596 
597 		int codeSelect = kUseDefaultVersion;
598 
599 		if (typeid(ColorType) == typeid(ColorTypeRgb)
600 			&& typeid(DrawMode) == typeid(DrawModeCopy)) {
601 			uint32 neededSIMDFlags = APPSERVER_SIMD_MMX | APPSERVER_SIMD_SSE;
602 			if ((gSIMDFlags & neededSIMDFlags) == neededSIMDFlags)
603 				codeSelect = kUseSIMDVersion;
604 			else {
605 				if (scaleX == scaleY && (scaleX == 1.5 || scaleX == 2.0
606 					|| scaleX == 2.5 || scaleX == 3.0)) {
607 					codeSelect = kOptimizeForLowFilterRatio;
608 				}
609 			}
610 		}
611 
612 		switch (codeSelect) {
613 			case kUseDefaultVersion:
614 			{
615 				BilinearDefault<ColorType, DrawMode> bilinearPainter;
616 				bilinearPainter.Draw(aggInterface, destinationRect, &bitmap,
617 					filterData);
618 				break;
619 			}
620 
621 			case kOptimizeForLowFilterRatio:
622 			{
623 				BilinearLowFilterRatio bilinearPainter;
624 				bilinearPainter.Draw(aggInterface, destinationRect,
625 					&bitmap, filterData);
626 				break;
627 			}
628 
629 #ifdef __i386__
630 			case kUseSIMDVersion:
631 			{
632 				BilinearSimd bilinearPainter;
633 				bilinearPainter.Draw(aggInterface, destinationRect, &bitmap,
634 					filterData);
635 				break;
636 			}
637 #endif	// __i386__
638 		}
639 
640 #ifdef FILTER_INFOS_ON_HEAP
641 		delete[] filterData.fWeightsX;
642 		delete[] filterData.fWeightsY;
643 #endif
644 		//printf("draw bitmap %.5fx%.5f: %lld\n", scaleX, scaleY,
645 		//	system_time() - now);
646 	}
647 };
648 
649 
650 } // namespace BitmapPainterPrivate
651 
652 
653 #endif // DRAW_BITMAP_BILINEAR_H
654