Skip to main content

By clicking Submit, you agree to the developerWorks terms of use.

The first time you sign into developerWorks, a profile is created for you. Select information in your developerWorks profile is displayed to the public, but you may edit the information at any time. Your first name, last name (unless you choose to hide them), and display name will accompany the content that you post.

All information submitted is secure.

  • Close [x]

The first time you sign in to developerWorks, a profile is created for you, so you need to choose a display name. Your display name accompanies the content you post on developerworks.

Please choose a display name between 3-31 characters. Your display name must be unique in the developerWorks community and should not be your email address for privacy reasons.

By clicking Submit, you agree to the developerWorks terms of use.

All information submitted is secure.

  • Close [x]

Unrolling AltiVec, Part 3: Down and dirty loop optimization

Learn how to tailor your code for AltiVec

Return to article


Listing 1. The original pre-vectorization code (colors.c)


#include <sys/time.h>
#include <math.h>
#include <stdio.h>
#define MIN(x, y, z) (MIN2(MIN2((x), (y)), (z)))
#define MAX(x, y, z) (MAX2(MAX2((x), (y)), (z)))
#define MIN2(x, y) ((x) < (y) ? (x) : (y))
#define MAX2(x, y) ((x) > (y) ? (x) : (y))

static unsigned int x;

// Note that this doesn't really care what range the input values come in.
void
inc_hsv(int size, unsigned char *red, unsigned char *green, unsigned char *blue) {
	int i;

	for (i = 0; i < size; ++i) {
		int r = red[i], g = green[i], b = blue[i];
		int max, min;
		unsigned char delta;
		unsigned char s;
		unsigned char h;
		float f;
		int sector;
		if (r > g) {
			if (r > b) {
				max = r;
				if (g > b) {
					min = b;
				} else {
					min = g;
				}
			} else {
				max = b;
				min = g;
			}
		} else {
			if (r > b) {
				max = g;
				min = b;
			} else {
				min = r;
				if (g > b) {
					max = g;
				} else {
					max = b;
				}
			}
		}
		delta = (max - min);

		s = (255 * (max - min)) / max;
		// now, how do I get a hue?
		if (r == max) {
			h = 42 * ((g - b) / (float) delta);
			if (h > 168)
				h -= 4;
		} else if (g == max) {
			h = 84 + 42 * ((b - r) / (float) delta);
		} else {
			h = 168 + 42 * ((r - g) / (float) delta);
		}
		// add half a point
		h += 21;
		if (h > 251)
			h -= 251;
		else if (h < 21)
			h += 4;
		/* and now somewhere from 0 to 6 */
		float fh = ((h * 6) / 252.0);
		sector = floor(fh);
		f = fh - sector;
		int p, q, t;
		p = max * (255 - s) / 255;
		q = max * (255 - s * f) / 255;
		t = max * (255 - s * (1 - f)) / 255;
		switch (sector) {
			case 0:
				red[i] = max;
				green[i] = t;
				blue[i] = p;
				break;
			case 1:
				red[i] = q;
				green[i] = max;
				blue[i] = p;
				break;
			case 2:
				red[i] = p;
				green[i] = max;
				blue[i] = t;
				break;
			case 3:
				red[i] = p;
				green[i] = q;
				blue[i] = max;
				break;
			case 4:
				red[i] = t;
				green[i] = p;
				blue[i] = max;
				break;
			default:		// case 5:
				red[i] = max;
				green[i] = p;
				blue[i] = q;
				break;
		}
	}

}

struct { int r, g, b; } colors[] = {
	{ 0xff, 0x00, 0x00 },
	{ 0xff, 0x2f, 0x00 },
	{ 0xff, 0x5f, 0x00 },
	{ 0xff, 0x8f, 0x00 },
	{ 0xff, 0xbf, 0x00 },
	{ 0xff, 0xef, 0x00 },
	{ 0xdf, 0xff, 0x00 },
	{ 0xaf, 0xff, 0x00 },
	{ 0x7f, 0xff, 0x00 },
	{ 0x4f, 0xff, 0x00 },
	{ 0x1f, 0xff, 0x00 },
	{ 0x00, 0xff, 0x0f },
	{ 0x00, 0xff, 0x3f },
	{ 0x00, 0xff, 0x6f },
	{ 0x00, 0xff, 0x9f },
	{ 0x00, 0xff, 0xcf },
	{ 0x00, 0xff, 0xff },
	{ 0x00, 0xcf, 0xff },
	{ 0x00, 0x9f, 0xff },
	{ 0x00, 0x6f, 0xff },
	{ 0x00, 0x3f, 0xff },
	{ 0x00, 0x0f, 0xff },
	{ 0x1f, 0x00, 0xff },
	{ 0x4f, 0x00, 0xff },
	{ 0x7f, 0x00, 0xff },
	{ 0xaf, 0x00, 0xff },
	{ 0xdf, 0x00, 0xff },
	{ 0xff, 0x00, 0xef },
	{ 0xff, 0x00, 0xbf },
	{ 0xff, 0x00, 0x8f },
	{ 0xff, 0x00, 0x5f },
	{ 0xff, 0x00, 0x2f },
};

unsigned char red[65536], green[65536], blue[65536];

int
main(void) {
	struct timezone dontcare = { 0, 0 };
	struct timeval before, after;
	long long microsec;
	int i, j = 0;

	// set up arrays from our little rainbow
	for (i = 0; i < 65536; ++i) {
		red[i] = colors[j].r;
		green[i] = colors[j].g;
		blue[i] = colors[j].b;
		++j;
		if (j == (sizeof(colors) / sizeof(*colors))) {
			j = 0;
		}
	}
	for (j = 0; j < 32; j += 2)
		printf("(%d) { %02x %02x %02x }\n", j, red[j], green[j], blue[j]);

	gettimeofday(&before, &dontcare);
	for (i = 0; i < 1; ++i) {
		inc_hsv(65536, red, green, blue);
	}
	gettimeofday(&after, &dontcare);
	microsec = (after.tv_usec - before.tv_usec) +
		1000000 * (after.tv_sec - before.tv_sec);
	for (j = 0; j < 32; j += 2)
		printf("(%d) { %02x %02x %02x }\n", j, red[j], green[j], blue[j]);
	printf("%lld microseconds\n", microsec);
	return 0;
}









Return to article