regenerated with new functions.
* gimp-composite-sse2-installer.c, gimp-composite-sse2-test.c: regenerated with new functions. * gimp-composite-sse.c, gimp-composite-sse2.c: Distinguish between 64bit and 128bit constants with a little faux hungarian notation. * gimp-composite-sse2.[ch]: Added implementations of addition_rgba8_rgba8_rgba8, subtract_rgba8_rgba8_rgba8, and swap_rgba8_rgba8_rgba8 * gimp-composite-generic.c: Some formating beautification
This commit is contained in:
16
ChangeLog
16
ChangeLog
@ -1,3 +1,19 @@
|
||||
2003-09-13 Helvetix Victorinox <helvetix@gimp.org>
|
||||
|
||||
* gimp-composite-sse2-installer.c, gimp-composite-sse2-test.c:
|
||||
regenerated with new functions.
|
||||
|
||||
* gimp-composite-sse.c, gimp-composite-sse2.c:
|
||||
Distinguish between 64bit and 128bit constants with a little
|
||||
faux hungarian notation.
|
||||
|
||||
* gimp-composite-sse2.[ch]: Added implementations of
|
||||
addition_rgba8_rgba8_rgba8, subtract_rgba8_rgba8_rgba8, and
|
||||
swap_rgba8_rgba8_rgba8
|
||||
|
||||
* gimp-composite-generic.c:
|
||||
Some formating beautification
|
||||
|
||||
2003-09-13 Maurits Rijk <lpeek.mrijk@consunet.nl>
|
||||
|
||||
* plug-ins/imagemap/grid.xpm: removed
|
||||
|
@ -1116,7 +1116,9 @@ void
|
||||
gimp_composite_color_erase_any_any_any_generic (GimpCompositeContext *ctx)
|
||||
{
|
||||
ctx->D = ctx->B;
|
||||
ctx->combine = (gimp_composite_pixel_alphap[ctx->pixelformat_A] && gimp_composite_pixel_alphap[ctx->pixelformat_B]) ? COLOR_ERASE_INTEN : 0;
|
||||
ctx->combine = (gimp_composite_pixel_alphap[ctx->pixelformat_A] && gimp_composite_pixel_alphap[ctx->pixelformat_B])
|
||||
? COLOR_ERASE_INTEN
|
||||
: 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -146,14 +146,14 @@
|
||||
"\tpunpckhbw %%"#zero", %%"#dst"\n"
|
||||
|
||||
|
||||
const static guint32 rgba8_alpha_mask[2] = { 0xFF000000, 0xFF000000 };
|
||||
const static guint32 rgba8_b1[2] = { 0x01010101, 0x01010101 };
|
||||
const static guint32 rgba8_b255[2] = { 0xFFFFFFFF, 0xFFFFFFFF };
|
||||
const static guint32 rgba8_w1[2] = { 0x00010001, 0x00010001 };
|
||||
const static guint32 rgba8_w2[2] = { 0x00020002, 0x00020002 };
|
||||
const static guint32 rgba8_w128[2] = { 0x00800080, 0x00800080 };
|
||||
const static guint32 rgba8_w256[2] = { 0x01000100, 0x01000100 };
|
||||
const static guint32 rgba8_w255[2] = { 0X00FF00FF, 0X00FF00FF };
|
||||
const static guint32 rgba8_alpha_mask_64[2] = { 0xFF000000, 0xFF000000 };
|
||||
const static guint32 rgba8_b1_64[2] = { 0x01010101, 0x01010101 };
|
||||
const static guint32 rgba8_b255_64[2] = { 0xFFFFFFFF, 0xFFFFFFFF };
|
||||
const static guint32 rgba8_w1_64[2] = { 0x00010001, 0x00010001 };
|
||||
const static guint32 rgba8_w2_64[2] = { 0x00020002, 0x00020002 };
|
||||
const static guint32 rgba8_w128_64[2] = { 0x00800080, 0x00800080 };
|
||||
const static guint32 rgba8_w256_64[2] = { 0x01000100, 0x01000100 };
|
||||
const static guint32 rgba8_w255_64[2] = { 0X00FF00FF, 0X00FF00FF };
|
||||
|
||||
const static guint32 va8_alpha_mask[2] = { 0xFF00FF00, 0xFF00FF00 };
|
||||
const static guint32 va8_b255[2] = { 0xFFFFFFFF, 0xFFFFFFFF };
|
||||
@ -170,7 +170,7 @@ gimp_composite_addition_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
|
||||
|
||||
asm volatile ("movq %0,%%mm0"
|
||||
: /* empty */
|
||||
: "m" (*rgba8_alpha_mask)
|
||||
: "m" (*rgba8_alpha_mask_64)
|
||||
: "%mm0");
|
||||
|
||||
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
||||
@ -265,7 +265,7 @@ gimp_composite_burn_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
|
||||
|
||||
"\tmovq %%mm7,%2\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*rgba8_b255), "m" (*rgba8_w1), "m" (*rgba8_w255), "m" (*rgba8_alpha_mask)
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*rgba8_b255_64), "m" (*rgba8_w1_64), "m" (*rgba8_w255_64), "m" (*rgba8_alpha_mask_64)
|
||||
: "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4");
|
||||
op.A += 8;
|
||||
op.B += 8;
|
||||
@ -319,7 +319,7 @@ gimp_composite_burn_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
|
||||
|
||||
"\tmovd %%mm7,%2\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*rgba8_b255), "m" (*rgba8_w1), "m" (*rgba8_w255), "m" (*rgba8_alpha_mask)
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*rgba8_b255_64), "m" (*rgba8_w1_64), "m" (*rgba8_w255_64), "m" (*rgba8_alpha_mask_64)
|
||||
: "0", "1", "2", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
|
||||
}
|
||||
|
||||
@ -365,7 +365,7 @@ gimp_composite_difference_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
|
||||
{
|
||||
GimpCompositeContext op = *_op;
|
||||
|
||||
asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask) : "%mm0");
|
||||
asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask_64) : "%mm0");
|
||||
|
||||
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
||||
{
|
||||
@ -421,7 +421,7 @@ gimp_composite_divide_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
|
||||
asm volatile ("movq %0, %%mm0\n"
|
||||
"\tmovq %1, %%mm7\n"
|
||||
:
|
||||
: "m" (*rgba8_alpha_mask), "m" (*rgba8_w1)
|
||||
: "m" (*rgba8_alpha_mask_64), "m" (*rgba8_w1_64)
|
||||
: "%mm0", "%mm7");
|
||||
|
||||
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
||||
@ -461,7 +461,7 @@ gimp_composite_divide_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
|
||||
|
||||
"\tmovq %%mm3,%2\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*rgba8_alpha_mask)
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*rgba8_alpha_mask_64)
|
||||
: "%eax", "%ecx", "%edx", "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
||||
op.A += 8;
|
||||
op.B += 8;
|
||||
@ -506,7 +506,7 @@ gimp_composite_divide_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
|
||||
|
||||
"\tmovd %%mm3,%2\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*rgba8_alpha_mask)
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*rgba8_alpha_mask_64)
|
||||
: "%eax", "%ecx", "%edx", "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
||||
}
|
||||
|
||||
@ -554,7 +554,7 @@ gimp_composite_dodge_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
|
||||
|
||||
"\tmovq %%mm7,%2\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*rgba8_w256), "m" (*rgba8_alpha_mask)
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*rgba8_w256_64), "m" (*rgba8_alpha_mask_64)
|
||||
: "0", "1", "2", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
|
||||
op.A += 8;
|
||||
op.B += 8;
|
||||
@ -597,7 +597,7 @@ gimp_composite_dodge_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
|
||||
|
||||
"\tmovd %%mm7,%2\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*rgba8_w256), "m" (*rgba8_alpha_mask)
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*rgba8_w256_64), "m" (*rgba8_alpha_mask_64)
|
||||
: "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
||||
}
|
||||
|
||||
@ -609,9 +609,9 @@ gimp_composite_grain_extract_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
|
||||
{
|
||||
GimpCompositeContext op = *_op;
|
||||
|
||||
asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask) : "%mm0");
|
||||
asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask_64) : "%mm0");
|
||||
asm volatile ("pxor %%mm6,%%mm6" : : : "%mm6");
|
||||
asm volatile ("movq %0,%%mm7" : : "m" (*rgba8_w128) : "%mm7");
|
||||
asm volatile ("movq %0,%%mm7" : : "m" (*rgba8_w128_64) : "%mm7");
|
||||
|
||||
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
||||
{
|
||||
@ -688,7 +688,7 @@ gimp_composite_grain_merge_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
|
||||
"pxor %%mm6, %%mm6\n"
|
||||
"movq %1, %%mm7\n"
|
||||
: /* empty */
|
||||
: "m" (*rgba8_alpha_mask), "m" (*rgba8_w128)
|
||||
: "m" (*rgba8_alpha_mask_64), "m" (*rgba8_w128_64)
|
||||
: "%mm0", "%mm6", "%mm7");
|
||||
|
||||
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
||||
@ -758,7 +758,7 @@ gimp_composite_lighten_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
|
||||
{
|
||||
GimpCompositeContext op = *_op;
|
||||
|
||||
asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask) : "%mm0");
|
||||
asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask_64) : "%mm0");
|
||||
|
||||
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
||||
{
|
||||
@ -808,8 +808,8 @@ gimp_composite_multiply_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
|
||||
{
|
||||
GimpCompositeContext op = *_op;
|
||||
|
||||
asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask) : "%mm0");
|
||||
asm volatile ("movq %0,%%mm7" : : "m" (*rgba8_w128) : "%mm7");
|
||||
asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask_64) : "%mm0");
|
||||
asm volatile ("movq %0,%%mm7" : : "m" (*rgba8_w128_64) : "%mm7");
|
||||
asm volatile ("pxor %%mm6,%%mm6" : : : "%mm6");
|
||||
|
||||
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
||||
@ -916,7 +916,7 @@ sse_op_overlay(void)
|
||||
"\tpor %%mm3,%%mm1\n"
|
||||
|
||||
: /* empty */
|
||||
: "m" (*rgba8_w2), "m" (*rgba8_alpha_mask)
|
||||
: "m" (*rgba8_w2_64), "m" (*rgba8_alpha_mask_64)
|
||||
);
|
||||
}
|
||||
|
||||
@ -928,7 +928,7 @@ xxxgimp_composite_overlay_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
|
||||
asm volatile ("pxor %%mm0,%%mm0\n"
|
||||
"movq %0,%%mm7"
|
||||
: /* empty */
|
||||
: "m" (*rgba8_w128) : "%mm0");
|
||||
: "m" (*rgba8_w128_64) : "%mm0");
|
||||
|
||||
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
||||
{
|
||||
@ -978,7 +978,7 @@ xxxgimp_composite_overlay_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
|
||||
|
||||
"\tmovq %%mm1,%2\n"
|
||||
: "+m" (*op.A), "+m" (*op.B), "+m" (*op.D)
|
||||
: "m" (*rgba8_w2), "m" (*rgba8_alpha_mask)
|
||||
: "m" (*rgba8_w2_64), "m" (*rgba8_alpha_mask_64)
|
||||
: "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4");
|
||||
op.A += 8;
|
||||
op.B += 8;
|
||||
@ -1033,7 +1033,7 @@ xxxgimp_composite_overlay_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
|
||||
|
||||
"\tmovd %%mm1,%2\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*rgba8_w2), "m" (*rgba8_alpha_mask)
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*rgba8_w2_64), "m" (*rgba8_alpha_mask_64)
|
||||
: "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4");
|
||||
}
|
||||
|
||||
@ -1057,7 +1057,7 @@ gimp_composite_scale_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
|
||||
"\tpor %%mm5,%%mm3\n"
|
||||
"\tmovq %1,%%mm7\n"
|
||||
: /* empty */
|
||||
: "m" (op.scale.scale), "m" (*rgba8_w128)
|
||||
: "m" (op.scale.scale), "m" (*rgba8_w128_64)
|
||||
: "%eax", "%mm0", "%mm5", "%mm6", "%mm7");
|
||||
|
||||
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
||||
@ -1109,8 +1109,8 @@ gimp_composite_screen_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
|
||||
{
|
||||
GimpCompositeContext op = *_op;
|
||||
|
||||
asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask) : "%mm0");
|
||||
asm volatile ("movq %0,%%mm7" : : "m" (*rgba8_w128) : "%mm7");
|
||||
asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask_64) : "%mm0");
|
||||
asm volatile ("movq %0,%%mm7" : : "m" (*rgba8_w128_64) : "%mm7");
|
||||
asm volatile ("pxor %mm6, %mm6");
|
||||
|
||||
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
||||
@ -1229,7 +1229,7 @@ gimp_composite_subtract_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op)
|
||||
{
|
||||
GimpCompositeContext op = *_op;
|
||||
|
||||
asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask) : "%mm0");
|
||||
asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask_64) : "%mm0");
|
||||
|
||||
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
||||
{
|
||||
|
@ -9,11 +9,30 @@
|
||||
|
||||
#include "gimp-composite-sse2.h"
|
||||
|
||||
static struct install_table {
|
||||
GimpCompositeOperation mode;
|
||||
GimpPixelFormat A;
|
||||
GimpPixelFormat B;
|
||||
GimpPixelFormat D;
|
||||
void (*function)(GimpCompositeContext *);
|
||||
} _gimp_composite_sse2[] = {
|
||||
#if (__GNUC__ >= 3) && defined(USE_SSE) && defined(ARCH_X86)
|
||||
{ GIMP_COMPOSITE_DIFFERENCE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_difference_rgba8_rgba8_rgba8_sse2 },
|
||||
{ GIMP_COMPOSITE_ADDITION, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_addition_rgba8_rgba8_rgba8_sse2 },
|
||||
{ GIMP_COMPOSITE_SUBTRACT, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_subtract_rgba8_rgba8_rgba8_sse2 },
|
||||
{ GIMP_COMPOSITE_SWAP, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_swap_rgba8_rgba8_rgba8_sse2 },
|
||||
#endif
|
||||
{ 0, 0, 0, 0, NULL }
|
||||
};
|
||||
|
||||
void
|
||||
gimp_composite_sse2_install (void)
|
||||
{
|
||||
/* nothing to do */
|
||||
static struct install_table *t = _gimp_composite_sse2;
|
||||
|
||||
for (t = &_gimp_composite_sse2[0]; t->function != NULL; t++) {
|
||||
gimp_composite_function[t->mode][t->A][t->B][t->D] = t->function;
|
||||
}
|
||||
|
||||
gimp_composite_sse2_init ();
|
||||
}
|
||||
|
@ -56,6 +56,42 @@ gimp_composite_sse2_test (int iterations, int n_pixels)
|
||||
va8M[i].a = i;
|
||||
}
|
||||
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_DIFFERENCE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_DIFFERENCE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_difference_rgba8_rgba8_rgba8_sse2, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("difference", &generic_ctx, &special_ctx)) {
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("difference", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_ADDITION, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_ADDITION, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_addition_rgba8_rgba8_rgba8_sse2, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("addition", &generic_ctx, &special_ctx)) {
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("addition", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_SUBTRACT, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_SUBTRACT, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_subtract_rgba8_rgba8_rgba8_sse2, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("subtract", &generic_ctx, &special_ctx)) {
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("subtract", ft0, ft1);
|
||||
|
||||
gimp_composite_context_init (&special_ctx, GIMP_COMPOSITE_SWAP, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D2);
|
||||
gimp_composite_context_init (&generic_ctx, GIMP_COMPOSITE_SWAP, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, n_pixels, (unsigned char *) rgba8A, (unsigned char *) rgba8B, (unsigned char *) rgba8B, (unsigned char *) rgba8D1);
|
||||
ft0 = gimp_composite_regression_time_function (iterations, gimp_composite_dispatch, &generic_ctx);
|
||||
ft1 = gimp_composite_regression_time_function (iterations, gimp_composite_swap_rgba8_rgba8_rgba8_sse2, &special_ctx);
|
||||
if (gimp_composite_regression_compare_contexts ("swap", &generic_ctx, &special_ctx)) {
|
||||
return (1);
|
||||
}
|
||||
gimp_composite_regression_timer_report ("swap", ft0, ft1);
|
||||
#endif
|
||||
return (0);
|
||||
}
|
||||
|
@ -41,9 +41,35 @@
|
||||
#define pmaxub(src,dst,tmp) "pmaxub " "%%" #src ", %%" #dst
|
||||
|
||||
const static guint32 rgba8_alpha_mask_128[4] = { 0xFF000000, 0xFF000000, 0xFF000000, 0xFF000000 };
|
||||
const static guint32 rgba8_b1_128[4] = { 0x01010101, 0x01010101, 0x01010101, 0x01010101 };
|
||||
const static guint32 rgba8_b255_128[4] = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF };
|
||||
const static guint32 rgba8_w1_128[4] = { 0x00010001, 0x00010001, 0x00010001, 0x00010001 };
|
||||
const static guint32 rgba8_w2_128[4] = { 0x00020002, 0x00020002, 0x00020002, 0x00020002 };
|
||||
const static guint32 rgba8_w128_128[4] = { 0x00800080, 0x00800080, 0x00800080, 0x00800080 };
|
||||
const static guint32 rgba8_w256_128[4] = { 0x01000100, 0x01000100, 0x01000100, 0x01000100 };
|
||||
const static guint32 rgba8_w255_128[4] = { 0X00FF00FF, 0X00FF00FF, 0X00FF00FF, 0X00FF00FF };
|
||||
|
||||
const static guint32 va8_alpha_mask_128[4] = { 0xFF00FF00, 0xFF00FF00, 0xFF00FF00, 0xFF00FF00 };
|
||||
const static guint32 va8_b255_128[4] = { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF };
|
||||
const static guint32 va8_w1_128[4] = { 0x00010001, 0x00010001, 0x00010001, 0x00010001 };
|
||||
const static guint32 va8_w255_128[4] = { 0x00FF00FF, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF };
|
||||
|
||||
const static guint32 rgba8_alpha_mask_64[2] = { 0xFF000000, 0xFF000000 };
|
||||
const static guint32 rgba8_b1_64[2] = { 0x01010101, 0x01010101 };
|
||||
const static guint32 rgba8_b255_64[2] = { 0xFFFFFFFF, 0xFFFFFFFF };
|
||||
const static guint32 rgba8_w1_64[2] = { 0x00010001, 0x00010001 };
|
||||
const static guint32 rgba8_w2_64[2] = { 0x00020002, 0x00020002 };
|
||||
const static guint32 rgba8_w128_64[2] = { 0x00800080, 0x00800080 };
|
||||
const static guint32 rgba8_w256_64[2] = { 0x01000100, 0x01000100 };
|
||||
const static guint32 rgba8_w255_64[2] = { 0X00FF00FF, 0X00FF00FF };
|
||||
|
||||
const static guint32 va8_alpha_mask_64[2] = { 0xFF00FF00, 0xFF00FF00 };
|
||||
const static guint32 va8_b255_64[2] = { 0xFFFFFFFF, 0xFFFFFFFF };
|
||||
const static guint32 va8_w1_64[2] = { 0x00010001, 0x00010001 };
|
||||
const static guint32 va8_w255_64[2] = { 0x00FF00FF, 0x00FF00FF };
|
||||
|
||||
void
|
||||
debug_display_sse(void)
|
||||
debug_display_sse (void)
|
||||
{
|
||||
#define mask32(x) ((x)& (unsigned long long) 0xFFFFFFFF)
|
||||
#define print128(reg) { \
|
||||
@ -61,7 +87,7 @@ debug_display_sse(void)
|
||||
}
|
||||
|
||||
void
|
||||
xxxgimp_composite_addition_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
gimp_composite_addition_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
GimpCompositeContext op = *_op;
|
||||
|
||||
@ -91,6 +117,31 @@ xxxgimp_composite_addition_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
op.D += 16;
|
||||
}
|
||||
|
||||
asm volatile ("movq %0,%%mm0"
|
||||
: /* empty */
|
||||
: "m" (*rgba8_alpha_mask_64)
|
||||
: "%mm0");
|
||||
|
||||
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
||||
{
|
||||
asm (" movq %0, %%mm2\n"
|
||||
"\tmovq %1, %%mm3\n"
|
||||
"\tmovq %%mm2, %%mm4\n"
|
||||
"\tpaddusb %%mm3, %%mm4\n"
|
||||
"\tmovq %%mm0, %%mm1\n"
|
||||
"\tpandn %%mm4, %%mm1\n"
|
||||
"\t" pminub(mm3, mm2, mm4) "\n"
|
||||
"\tpand %%mm0, %%mm2\n"
|
||||
"\tpor %%mm2, %%mm1\n"
|
||||
"\tmovq %%mm1, %2\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D)
|
||||
: "0", "1", "2", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7");
|
||||
op.A += 8;
|
||||
op.B += 8;
|
||||
op.D += 8;
|
||||
}
|
||||
|
||||
if (op.n_pixels)
|
||||
{
|
||||
asm volatile (" movd (%0), %%mm2;\n"
|
||||
@ -111,6 +162,311 @@ xxxgimp_composite_addition_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
asm("emms");
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
xxxgimp_composite_burn_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
xxxgimp_composite_darken_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
gimp_composite_difference_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
GimpCompositeContext op = *_op;
|
||||
|
||||
asm volatile (" movq %0,%%mm0\n"
|
||||
"\tmovdqu %1,%%xmm0"
|
||||
:
|
||||
: "m" (*rgba8_alpha_mask_64), "m" (*rgba8_alpha_mask_128)
|
||||
: "%mm0");
|
||||
|
||||
for (; op.n_pixels >= 4; op.n_pixels -= 4)
|
||||
{
|
||||
asm volatile (" movdqu %0,%%xmm2\n"
|
||||
"\tmovdqu %1,%%xmm3\n"
|
||||
"\tmovdqu %%xmm2,%%xmm4\n"
|
||||
"\tmovdqu %%xmm3,%%xmm5\n"
|
||||
"\tpsubusb %%xmm3,%%xmm4\n"
|
||||
"\tpsubusb %%xmm2,%%xmm5\n"
|
||||
"\tpaddb %%xmm5,%%xmm4\n"
|
||||
"\tmovdqu %%xmm0,%%xmm1\n"
|
||||
"\tpandn %%xmm4,%%xmm1\n"
|
||||
"\tpminub %%xmm3,%%xmm2\n"
|
||||
"\tpand %%xmm0,%%xmm2\n"
|
||||
"\tpor %%xmm2,%%xmm1\n"
|
||||
"\tmovdqu %%xmm1,%2\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D)
|
||||
: "0", "1", "2", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5");
|
||||
op.A += 16;
|
||||
op.B += 16;
|
||||
op.D += 16;
|
||||
}
|
||||
|
||||
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
||||
{
|
||||
asm volatile (" movq %0, %%mm2\n"
|
||||
"\tmovq %1, %%mm3\n"
|
||||
"\tmovq %%mm2, %%mm4\n"
|
||||
"\tmovq %%mm3, %%mm5\n"
|
||||
"\tpsubusb %%mm3, %%mm4\n"
|
||||
"\tpsubusb %%mm2, %%mm5\n"
|
||||
"\tpaddb %%mm5, %%mm4\n"
|
||||
"\tmovq %%mm0, %%mm1\n"
|
||||
"\tpandn %%mm4, %%mm1\n"
|
||||
"\tpminub %%mm3, %%mm2\n"
|
||||
"\tpand %%mm0, %%mm2\n"
|
||||
"\tpor %%mm2, %%mm1\n"
|
||||
"\tmovq %%mm1, %2\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D)
|
||||
: "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
||||
op.A += 8;
|
||||
op.B += 8;
|
||||
op.D += 8;
|
||||
}
|
||||
|
||||
if (op.n_pixels)
|
||||
{
|
||||
asm volatile (" movd %0, %%mm2\n"
|
||||
"\tmovd %1, %%mm3\n"
|
||||
"\tmovq %%mm2, %%mm4\n"
|
||||
"\tmovq %%mm3, %%mm5\n"
|
||||
"\tpsubusb %%mm3, %%mm4\n"
|
||||
"\tpsubusb %%mm2, %%mm5\n"
|
||||
"\tpaddb %%mm5, %%mm4\n"
|
||||
"\tmovq %%mm0, %%mm1\n"
|
||||
"\tpandn %%mm4, %%mm1\n"
|
||||
"\tpminub %%mm3, %%mm2\n"
|
||||
"\tpand %%mm0, %%mm2\n"
|
||||
"\tpor %%mm2, %%mm1\n"
|
||||
"\tmovd %%mm1, %2\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D)
|
||||
: "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4");
|
||||
}
|
||||
|
||||
asm("emms");
|
||||
}
|
||||
|
||||
void
|
||||
xxxgimp_composite_divide_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
xxxgimp_composite_dodge_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
xxxgimp_composite_grain_extract_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
xxxgimp_composite_grain_merge_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
xxxgimp_composite_lighten_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
xxxgimp_composite_multiply_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
}
|
||||
|
||||
static void
|
||||
sse2_op_overlay(void)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
xxxgimp_composite_overlay_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
xxxgimp_composite_scale_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
xxxgimp_composite_screen_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
gimp_composite_subtract_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
GimpCompositeContext op = *_op;
|
||||
|
||||
asm volatile (" movq %0,%%mm0\n"
|
||||
"\tmovdqu %1,%%xmm0\n"
|
||||
: /* empty */
|
||||
: "m" (*rgba8_alpha_mask_64), "m" (*rgba8_alpha_mask_128)
|
||||
: "%mm0", "%xmm0");
|
||||
|
||||
for (; op.n_pixels >= 4; op.n_pixels -= 4)
|
||||
{
|
||||
asm volatile (" movdqu %0,%%xmm2\n"
|
||||
"\tmovdqu %1,%%xmm3\n"
|
||||
"\tmovdqu %%xmm2,%%xmm4\n"
|
||||
"\tpsubusb %%xmm3,%%xmm4\n"
|
||||
|
||||
"\tmovdqu %%xmm0,%%xmm1\n"
|
||||
"\tpandn %%xmm4,%%xmm1\n"
|
||||
"\t" pminub(xmm3,xmm2,xmm4) "\n"
|
||||
"\tpand %%xmm0,%%xmm2\n"
|
||||
"\tpor %%xmm2,%%xmm1\n"
|
||||
"\tmovdqu %%xmm1,%2\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D)
|
||||
: "0", "1", "2", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5");
|
||||
op.A += 16;
|
||||
op.B += 16;
|
||||
op.D += 16;
|
||||
}
|
||||
|
||||
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
||||
{
|
||||
asm volatile (" movq %0,%%mm2\n"
|
||||
"\tmovq %1,%%mm3\n"
|
||||
|
||||
"\tmovq %%mm2,%%mm4\n"
|
||||
"\tpsubusb %%mm3,%%mm4\n"
|
||||
|
||||
"\tmovq %%mm0,%%mm1\n"
|
||||
"\tpandn %%mm4,%%mm1\n"
|
||||
|
||||
"\t" pminub(mm3,mm2,mm4) "\n"
|
||||
|
||||
"\tpand %%mm0,%%mm2\n"
|
||||
"\tpor %%mm2,%%mm1\n"
|
||||
"\tmovq %%mm1,%2\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D)
|
||||
: "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
||||
op.A += 8;
|
||||
op.B += 8;
|
||||
op.D += 8;
|
||||
}
|
||||
|
||||
if (op.n_pixels)
|
||||
{
|
||||
asm volatile (" movd %0,%%mm2\n"
|
||||
"\tmovd %1,%%mm3\n"
|
||||
|
||||
"\tmovq %%mm2,%%mm4\n"
|
||||
"\tpsubusb %%mm3,%%mm4\n"
|
||||
|
||||
"\tmovq %%mm0,%%mm1\n"
|
||||
"\tpandn %%mm4,%%mm1\n"
|
||||
|
||||
"\t" pminub(mm3,mm2,mm4) "\n"
|
||||
|
||||
"\tpand %%mm0,%%mm2\n"
|
||||
"\tpor %%mm2,%%mm1\n"
|
||||
"\tmovd %%mm1,%2\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B), "m" (*op.D)
|
||||
: "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5");
|
||||
}
|
||||
|
||||
asm("emms");
|
||||
}
|
||||
|
||||
void
|
||||
gimp_composite_swap_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *_op)
|
||||
{
|
||||
GimpCompositeContext op = *_op;
|
||||
|
||||
/*
|
||||
* Inhale one whole i686 cache line at once. 64 bytes, 16 rgba8 pixels, 4 128 bit xmm registers.
|
||||
*/
|
||||
for (; op.n_pixels >= 16; op.n_pixels -= 16)
|
||||
{
|
||||
asm volatile (" movdqu %0,%%xmm0\n"
|
||||
"\tmovdqu %1,%%xmm1\n"
|
||||
"\tmovdqu %2,%%xmm2\n"
|
||||
"\tmovdqu %3,%%xmm3\n"
|
||||
"\tmovdqu %4,%%xmm4\n"
|
||||
"\tmovdqu %5,%%xmm5\n"
|
||||
"\tmovdqu %6,%%xmm6\n"
|
||||
"\tmovdqu %7,%%xmm7\n"
|
||||
|
||||
"\tmovdqu %%xmm0,%1\n"
|
||||
"\tmovdqu %%xmm1,%0\n"
|
||||
"\tmovdqu %%xmm2,%3\n"
|
||||
"\tmovdqu %%xmm3,%2\n"
|
||||
"\tmovdqu %%xmm4,%5\n"
|
||||
"\tmovdqu %%xmm5,%4\n"
|
||||
"\tmovdqu %%xmm6,%7\n"
|
||||
"\tmovdqu %%xmm7,%6\n"
|
||||
: /* empty */
|
||||
: "m" (op.A[0]), "m" (op.B[0]),
|
||||
"m" (op.A[1]), "m" (op.B[1]),
|
||||
"m" (op.A[2]), "m" (op.B[2]),
|
||||
"m" (op.A[3]), "m" (op.B[3])
|
||||
: "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7");
|
||||
op.A += 64;
|
||||
op.B += 64;
|
||||
}
|
||||
|
||||
for (; op.n_pixels >= 4; op.n_pixels -= 4)
|
||||
{
|
||||
asm volatile (" movdqu %0,%%xmm2\n"
|
||||
"\tmovdqu %1,%%xmm3\n"
|
||||
"\tmovdqu %%xmm3,%0\n"
|
||||
"\tmovdqu %%xmm2,%1\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B)
|
||||
: "0", "1", "%xmm1", "%xmm2", "%xmm3", "%xmm4");
|
||||
op.A += 16;
|
||||
op.B += 16;
|
||||
}
|
||||
|
||||
for (; op.n_pixels >= 2; op.n_pixels -= 2)
|
||||
{
|
||||
asm volatile (" movq %0,%%mm2\n"
|
||||
"\tmovq %1,%%mm3\n"
|
||||
"\tmovq %%mm3,%0\n"
|
||||
"\tmovq %%mm2,%1\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B)
|
||||
: "0", "1", "%mm1", "%mm2", "%mm3", "%mm4");
|
||||
op.A += 8;
|
||||
op.B += 8;
|
||||
}
|
||||
|
||||
if (op.n_pixels)
|
||||
{
|
||||
asm volatile (" movd %0,%%mm2\n"
|
||||
"\tmovd %1,%%mm3\n"
|
||||
"\tmovd %%mm3,%0\n"
|
||||
"\tmovd %%mm2,%1\n"
|
||||
: /* empty */
|
||||
: "m" (*op.A), "m" (*op.B)
|
||||
: "0", "1", "%mm1", "%mm2", "%mm3", "%mm4");
|
||||
}
|
||||
|
||||
asm("emms");
|
||||
}
|
||||
|
||||
#endif /* __GNUC__ > 3 */
|
||||
#endif /* defined(ARCH_X86) */
|
||||
#endif /* defined(USE_SSE) */
|
||||
|
@ -10,4 +10,7 @@ extern void gimp_composite_sse2_init (void);
|
||||
extern void gimp_composite_sse2_install (void);
|
||||
|
||||
extern void gimp_composite_addition_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *);
|
||||
extern void gimp_composite_difference_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *);
|
||||
extern void gimp_composite_subtract_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *);
|
||||
extern void gimp_composite_swap_rgba8_rgba8_rgba8_sse2 (GimpCompositeContext *);
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user