Skip to main content

skip to main content

developerWorks  >  Power Architecture technology  >

Unrolling AltiVec, Part 3: Down and dirty loop optimization

Learn how to tailor your code for AltiVec

developerWorks

Return to article


Listing 1. The original pre-vectorization code (colors.c)


#include <sys/time.h>
#include <math.h>
#include <stdio.h>
#define MIN(x, y, z) (MIN2(MIN2((x), (y)), (z)))
#define MAX(x, y, z) (MAX2(MAX2((x), (y)), (z)))
#define MIN2(x, y) ((x) < (y) ? (x) : (y))
#define MAX2(x, y) ((x) > (y) ? (x) : (y))

static unsigned int x;

// Note that this doesn't really care what range the input values come in.
void
inc_hsv(int size, unsigned char *red, unsigned char *green, unsigned char *blue) {
	int i;

	for (i = 0; i < size; ++i) {
		int r = red[i], g = green[i], b = blue[i];
		int max, min;
		unsigned char delta;
		unsigned char s;
		unsigned char h;
		float f;
		int sector;
		if (r > g) {
			if (r > b) {
				max = r;
				if (g > b) {
					min = b;
				} else {
					min = g;
				}
			} else {
				max = b;
				min = g;
			}
		} else {
			if (r > b) {
				max = g;
				min = b;
			} else {
				min = r;
				if (g > b) {
					max = g;
				} else {
					max = b;
				}
			}
		}
		delta = (max - min);

		s = (255 * (max - min)) / max;
		// now, how do I get a hue?
		if (r == max) {
			h = 42 * ((g - b) / (float) delta);
			if (h > 168)
				h -= 4;
		} else if (g == max) {
			h = 84 + 42 * ((b - r) / (float) delta);
		} else {
			h = 168 + 42 * ((r - g) / (float) delta);
		}
		// add half a point
		h += 21;
		if (h > 251)
			h -= 251;
		else if (h < 21)
			h += 4;
		/* and now somewhere from 0 to 6 */
		float fh = ((h * 6) / 252.0);
		sector = floor(fh);
		f = fh - sector;
		int p, q, t;
		p = max * (255 - s) / 255;
		q = max * (255 - s * f) / 255;
		t = max * (255 - s * (1 - f)) / 255;
		switch (sector) {
			case 0:
				red[i] = max;
				green[i] = t;
				blue[i] = p;
				break;
			case 1:
				red[i] = q;
				green[i] = max;
				blue[i] = p;
				break;
			case 2:
				red[i] = p;
				green[i] = max;
				blue[i] = t;
				break;
			case 3:
				red[i] = p;
				green[i] = q;
				blue[i] = max;
				break;
			case 4:
				red[i] = t;
				green[i] = p;
				blue[i] = max;
				break;
			default:		// case 5:
				red[i] = max;
				green[i] = p;
				blue[i] = q;
				break;
		}
	}

}

struct { int r, g, b; } colors[] = {
	{ 0xff, 0x00, 0x00 },
	{ 0xff, 0x2f, 0x00 },
	{ 0xff, 0x5f, 0x00 },
	{ 0xff, 0x8f, 0x00 },
	{ 0xff, 0xbf, 0x00 },
	{ 0xff, 0xef, 0x00 },
	{ 0xdf, 0xff, 0x00 },
	{ 0xaf, 0xff, 0x00 },
	{ 0x7f, 0xff, 0x00 },
	{ 0x4f, 0xff, 0x00 },
	{ 0x1f, 0xff, 0x00 },
	{ 0x00, 0xff, 0x0f },
	{ 0x00, 0xff, 0x3f },
	{ 0x00, 0xff, 0x6f },
	{ 0x00, 0xff, 0x9f },
	{ 0x00, 0xff, 0xcf },
	{ 0x00, 0xff, 0xff },
	{ 0x00, 0xcf, 0xff },
	{ 0x00, 0x9f, 0xff },
	{ 0x00, 0x6f, 0xff },
	{ 0x00, 0x3f, 0xff },
	{ 0x00, 0x0f, 0xff },
	{ 0x1f, 0x00, 0xff },
	{ 0x4f, 0x00, 0xff },
	{ 0x7f, 0x00, 0xff },
	{ 0xaf, 0x00, 0xff },
	{ 0xdf, 0x00, 0xff },
	{ 0xff, 0x00, 0xef },
	{ 0xff, 0x00, 0xbf },
	{ 0xff, 0x00, 0x8f },
	{ 0xff, 0x00, 0x5f },
	{ 0xff, 0x00, 0x2f },
};

unsigned char red[65536], green[65536], blue[65536];

int
main(void) {
	struct timezone dontcare = { 0, 0 };
	struct timeval before, after;
	long long microsec;
	int i, j = 0;

	// set up arrays from our little rainbow
	for (i = 0; i < 65536; ++i) {
		red[i] = colors[j].r;
		green[i] = colors[j].g;
		blue[i] = colors[j].b;
		++j;
		if (j == (sizeof(colors) / sizeof(*colors))) {
			j = 0;
		}
	}
	for (j = 0; j < 32; j += 2)
		printf("(%d) { %02x %02x %02x }\n", j, red[j], green[j], blue[j]);

	gettimeofday(&before, &dontcare);
	for (i = 0; i < 1; ++i) {
		inc_hsv(65536, red, green, blue);
	}
	gettimeofday(&after, &dontcare);
	microsec = (after.tv_usec - before.tv_usec) +
		1000000 * (after.tv_sec - before.tv_sec);
	for (j = 0; j < 32; j += 2)
		printf("(%d) { %02x %02x %02x }\n", j, red[j], green[j], blue[j]);
	printf("%lld microseconds\n", microsec);
	return 0;
}









Return to article