#include <sys/time.h>
#include <math.h>
#include <stdio.h>
void
inc_hsv(int size, unsigned char *red, unsigned char *green, unsigned char *blue) {
int i;
unsigned char *r, *g, *b;
vector float one_255th = { 1.0 / 255, 1.0 / 255, 1.0 / 255, 1.0 / 255 };
vector float fh_factor = { 6.0 / 252.0, 6.0 / 252.0,
6.0 / 252.0, 6.0 / 252.0 };
vector float vec_zero = { 0.0 };
vector float vec_half = { 0.5, 0.5, 0.5, 0.5 };
vector float vec_unity = { 1.0, 1.0, 1.0, 1.0 };
vector unsigned char u8_1 = vec_splat_u8(1);
vector unsigned short u16_1 = vec_splat_u16(1);
// multiplying converts:
// 0123456789abcdef
// to:
// 000044448888cccc
// 22226666aaaaeeee
// 111155559999dddd
// 33337777bbbbffff
unsigned char inorder_values[16] = {
0x00, 0x08, 0x04, 0x0c,
0x01, 0x09, 0x05, 0x0d,
0x02, 0x0a, 0x06, 0x0e,
0x03, 0x0b, 0x07, 0x0f
};
vector unsigned char inorder = vec_ld(0, inorder_values);
unsigned char split_values[16] = {
0x00, 0x08, 0x01, 0x09,
0x02, 0x0a, 0x03, 0x0b,
0x04, 0x0c, 0x05, 0x0d,
0x06, 0x0e, 0x07, 0x0f
};
vector unsigned char split = vec_ld(0, split_values);
vector unsigned char u8_42 = vec_add(vec_splat_u8(10), vec_splat_u8(11));
// start it at 21, double to get 42.
u8_42 = vec_add(u8_42, u8_42);
for (i = 0; i < size; i += 16) {
int j;
register vector unsigned char r16, g16, b16;
register vector unsigned char max16, min16, delta16;
vector bool char r_max, g_max, b_max;
vector unsigned char hue_r16, hue_g16, hue_b16;
vector unsigned char hue16 = (vector unsigned char) {0};
vector unsigned char sat16;
unsigned char delta[16];
unsigned char hue_r[16];
unsigned char hue_g[16];
unsigned char hue_b[16];
vector unsigned char scratch8;
vector unsigned short scratch[4];
vector float fh[4];
vector float fv[4];
vector float fs[4];
vector unsigned char v84;
unsigned char values[16];
unsigned char sats[16];
signed short g_b[16], b_r[16], r_g[16];
r = red + i;
g = green + i;
b = blue + i;
r16 = vec_ld(0, r);
g16 = vec_ld(0, g);
b16 = vec_ld(0, b);
max16 = vec_max(vec_max(r16, g16), b16);
min16 = vec_min(vec_min(r16, g16), b16);
delta16 = vec_sub(max16, min16);
vec_st(delta16, 0, delta);
vec_st(max16, 0, values);
for (j = 0; j < 16; ++j) {
sats[j] = (255 * delta[j] / values[j]);
}
sat16 = vec_ld(0, sats);
// these indicate which values to take from the
// result registers
r_max = vec_cmpeq(max16, r16);
g_max = vec_cmpeq(max16, g16);
b_max = vec_cmpeq(max16, b16);
// precalculate x-y*42 values
scratch8 = vec_perm(g16, g16, split);
scratch[0] = vec_mule(scratch8, u8_42);
scratch[1] = vec_mulo(scratch8, u8_42);
scratch8 = vec_perm(b16, b16, split);
scratch[2] = vec_mule(scratch8, u8_42);
scratch[3] = vec_mulo(scratch8, u8_42);
vec_st(vec_sub((vector signed short) scratch[0], (vector
signed short) scratch[2]), 0, g_b);
vec_st(vec_sub((vector signed short) scratch[1], (vector
signed short) scratch[3]), 16, g_b);
scratch8 = vec_perm(b16, b16, split);
scratch[0] = vec_mule(scratch8, u8_42);
scratch[1] = vec_mulo(scratch8, u8_42);
scratch8 = vec_perm(r16, r16, split);
scratch[2] = vec_mule(scratch8, u8_42);
scratch[3] = vec_mulo(scratch8, u8_42);
vec_st(vec_sub((vector signed short) scratch[0], (vector
signed short) scratch[2]), 0, b_r);
vec_st(vec_sub((vector signed short) scratch[1], (vector
signed short) scratch[3]), 16, b_r);
scratch8 = vec_perm(r16, r16, split);
scratch[0] = vec_mule(scratch8, u8_42);
scratch[1] = vec_mulo(scratch8, u8_42);
scratch8 = vec_perm(g16, g16, split);
scratch[2] = vec_mule(scratch8, u8_42);
scratch[3] = vec_mulo(scratch8, u8_42);
vec_st(vec_sub((vector signed short) scratch[0], (vector
signed short) scratch[2]), 0, r_g);
vec_st(vec_sub((vector signed short) scratch[1], (vector
signed short) scratch[3]), 16, r_g);
// Hue ranges from 0 to 251, because 252 is divisible by 6,
// and this eliminates a subtle drifting effect.
for (j = 0; j < 16; ++j) {
if (delta[j] == 0) {
hue_r[j] = hue_g[j] = hue_b[j] = 0;
continue;
}
if (r[j] == values[j]) {
hue_r[j] = g_b[j] / (float) delta[j];
if (hue_r[j] > 127)
hue_r[j] -= 4;
}
if (g[j] == values[j]) {
hue_g[j] = b_r[j] / (float) delta[j];
}
if (b[j] == values[j]) {
hue_b[j] = r_g[j] / (float) delta[j];
}
}
v84 = vec_add(u8_42, u8_42); // 42+42 = 84
hue_r16 = vec_ld(0, hue_r);
hue_g16 = vec_add(v84, vec_ld(0, hue_g));
v84 = vec_add(v84, v84); // == 168
hue_b16 = vec_add(v84, vec_ld(0, hue_b));
hue16 = vec_sel(hue16, hue_r16, r_max);
hue16 = vec_sel(hue16, hue_g16, g_max);
hue16 = vec_sel(hue16, hue_b16, b_max);
// we now have hue, sat, and value registers of 16 values at
// once... unfortunately, we need to multiply by a float
// value, so some splitting up needs to happen
hue16 = vec_perm(hue16, hue16, inorder);
scratch[0] = vec_mule(hue16, u8_1);
scratch[1] = vec_mulo(hue16, u8_1);
max16 = vec_perm(max16, max16, inorder);
fh[0] = vec_madd(vec_ctf(vec_mule(scratch[0], u16_1), 0),
fh_factor, vec_half);
fh[1] = vec_madd(vec_ctf(vec_mulo(scratch[0], u16_1), 0),
fh_factor, vec_half);
scratch[0] = vec_mule(max16, u8_1);
fh[2] = vec_madd(vec_ctf(vec_mule(scratch[1], u16_1), 0),
fh_factor, vec_half);
fh[3] = vec_madd(vec_ctf(vec_mulo(scratch[1], u16_1), 0),
fh_factor, vec_half);
scratch[1] = vec_mulo(max16, u8_1);
sat16 = vec_perm(sat16, sat16, inorder);
fv[0] = vec_ctf(vec_mule(scratch[0], u16_1), 0);
fv[1] = vec_ctf(vec_mulo(scratch[0], u16_1), 0);
scratch[0] = vec_mule(sat16, u8_1);
fv[2] = vec_ctf(vec_mule(scratch[1], u16_1), 0);
fv[3] = vec_ctf(vec_mulo(scratch[1], u16_1), 0);
scratch[1] = vec_mulo(sat16, u8_1);
fs[0] = vec_ctf(vec_mule(scratch[0], u16_1), 0);
fs[1] = vec_ctf(vec_mulo(scratch[0], u16_1), 0);
fs[2] = vec_ctf(vec_mule(scratch[1], u16_1), 0);
fs[3] = vec_ctf(vec_mulo(scratch[1], u16_1), 0);
for (j = 0; j < 4; ++j) {
vector unsigned int sector;
vector float remainder;
vector float floor;
vector float six;
vector float sub6;
vector bool int dosub;
vector float p, q, t;
vector float vec255 = { 255.0, 255.0, 255.0, 255.0 };
float ps[4], qs[4], ts[4];
unsigned int sectors[4];
int k;
six = (vector float) { 6.0, 6.0, 6.0, 6.0 };
dosub = vec_cmpgt(fh[j], six);
sub6 = vec_sub(fh[j], six);
fh[j] = vec_sel(fh[j], sub6, dosub);
floor = vec_floor(fh[j]);
sector = vec_ctu(floor, 0);
vec_st(sector, 0, sectors);
remainder = vec_sub(fh[j], floor);
{
/* p = (255 - s) * (max / 255);
* q = (255 - s * f) * (max / 255);
* t = (255 - s * (1 - f)) * (max / 255);
*/
vector float max_255;
max_255 = vec_madd(fv[j], one_255th, vec_zero);
p = vec_sub(vec255, fs[j]);
q = vec_sub(vec255, vec_madd(fs[j], remainder, vec_zero));
t = vec_sub(vec255,
vec_madd(fs[j],
vec_sub(vec_unity, remainder),
vec_zero));
p = vec_madd(p, max_255, vec_zero);
q = vec_madd(q, max_255, vec_zero);
t = vec_madd(t, max_255, vec_zero);
}
vec_st(p, 0, ps);
vec_st(q, 0, qs);
vec_st(t, 0, ts);
for (k = 0; k < 4; ++k) {
int index = j * 4 + k;
switch (sectors[k]) {
case 0:
r[index] = values[index];
g[index] = ts[k];
b[index] = ps[k];
break;
case 1:
r[index] = qs[k];
g[index] = values[index];
b[index] = ps[k];
break;
case 2:
r[index] = ps[k];
g[index] = values[index];
b[index] = ts[k];
break;
case 3:
r[index] = ps[k];
g[index] = qs[k];
b[index] = values[index];
break;
case 4:
r[index] = ts[k];
g[index] = ps[k];
b[index] = values[index];
break;
default: // case 5:
r[index] = values[index];
g[index] = ps[k];
b[index] = qs[k];
break;
}
}
}
}
}
struct { int r, g, b; } colors[] = {
{ 0xff, 0x00, 0x00 },
{ 0xff, 0x2f, 0x00 },
{ 0xff, 0x5f, 0x00 },
{ 0xff, 0x8f, 0x00 },
{ 0xff, 0xbf, 0x00 },
{ 0xff, 0xef, 0x00 },
{ 0xdf, 0xff, 0x00 },
{ 0xaf, 0xff, 0x00 },
{ 0x7f, 0xff, 0x00 },
{ 0x4f, 0xff, 0x00 },
{ 0x1f, 0xff, 0x00 },
{ 0x00, 0xff, 0x0f },
{ 0x00, 0xff, 0x3f },
{ 0x00, 0xff, 0x6f },
{ 0x00, 0xff, 0x9f },
{ 0x00, 0xff, 0xcf },
{ 0x00, 0xff, 0xff },
{ 0x00, 0xcf, 0xff },
{ 0x00, 0x9f, 0xff },
{ 0x00, 0x6f, 0xff },
{ 0x00, 0x3f, 0xff },
{ 0x00, 0x0f, 0xff },
{ 0x1f, 0x00, 0xff },
{ 0x4f, 0x00, 0xff },
{ 0x7f, 0x00, 0xff },
{ 0xaf, 0x00, 0xff },
{ 0xdf, 0x00, 0xff },
{ 0xff, 0x00, 0xef },
{ 0xff, 0x00, 0xbf },
{ 0xff, 0x00, 0x8f },
{ 0xff, 0x00, 0x5f },
{ 0xff, 0x00, 0x2f },
};
unsigned char red[65536], green[65536], blue[65536];
int
main(void) {
struct timezone dontcare = { 0, 0 };
struct timeval before, after;
long long microsec;
int i, j = 0;
// set up arrays from our little rainbow
for (i = 0; i < 65536; ++i) {
red[i] = colors[j].r;
green[i] = colors[j].g;
blue[i] = colors[j].b;
++j;
if (j == (sizeof(colors) / sizeof(*colors))) {
j = 0;
}
}
for (j = 0; j < 32; j += 2)
printf("(%d) { %02x %02x %02x }\n", j, red[j], green[j], blue[j]);
gettimeofday(&before, &dontcare);
for (i = 0; i < 200; ++i) {
inc_hsv(65536, red, green, blue);
}
gettimeofday(&after, &dontcare);
microsec = (after.tv_usec - before.tv_usec) +
1000000 * (after.tv_sec - before.tv_sec);
for (j = 0; j < 32; j += 2)
printf("(%d) { %02x %02x %02x }\n", j, red[j], green[j], blue[j]);
printf("%lld microseconds\n", microsec);
return 0;
}
|