From 54630be219682851fadef9cd4ce67b0c579d74df Mon Sep 17 00:00:00 2001 From: Helvetix Victorinox Date: Tue, 20 Jul 2004 15:59:12 +0000 Subject: [PATCH] Print the list of active instruction sets if the --verbose command line * app/composite/gimp-composite.c (gimp_composite_init): Print the list of active instruction sets if the --verbose command line switch is ON (via be_verbose) * app/composite/gimp-composite-x86.h: Factored code from the mmx, and sse implementations. * app/composite/make-installer.py: Raised the number of test iterations from 1 to 10. * app/composite/gimp-composite-3dnow.[ch] * app/composite/gimp-composite-3dnow-test.c * app/composite/gimp-composite-3dnow-installer.c * app/composite/gimp-composite-altivec.[ch] * app/composite/gimp-composite-altivec-test.c * app/composite/gimp-composite-altivec-installer.c * app/composite/gimp-composite-mmx.[ch] * app/composite/gimp-composite-altivec-test.c * app/composite/gimp-composite-altivec-installer.c * app/composite/gimp-composite-sse.[ch] * app/composite/gimp-composite-sse-test.c * app/composite/gimp-composite-sse-installer.c * app/composite/gimp-composite-sse2.[ch] * app/composite/gimp-composite-sse2-test.c * app/composite/gimp-composite-sse2-installer.c * app/composite/gimp-composite-vis.[ch] * app/composite/gimp-composite-vis-test.c: Regenerated sources via make-installer.py --- ChangeLog | 32 + app/composite/gimp-composite-3dnow-test.c | 4 +- app/composite/gimp-composite-3dnow.c | 13 +- app/composite/gimp-composite-3dnow.h | 12 + app/composite/gimp-composite-altivec-test.c | 4 +- app/composite/gimp-composite-altivec.c | 8 +- app/composite/gimp-composite-altivec.h | 6 + app/composite/gimp-composite-mmx-installer.c | 2 +- app/composite/gimp-composite-mmx-test.c | 4 +- app/composite/gimp-composite-mmx.c | 550 ++++++++++-------- app/composite/gimp-composite-sse-installer.c | 2 +- app/composite/gimp-composite-sse-test.c | 4 +- app/composite/gimp-composite-sse.c | 169 +++--- app/composite/gimp-composite-sse2-installer.c | 2 +- app/composite/gimp-composite-sse2-test.c | 4 +- app/composite/gimp-composite-vis-test.c | 4 +- app/composite/gimp-composite-vis.c | 10 +- app/composite/gimp-composite-vis.h | 8 + app/composite/gimp-composite-x86.h | 21 +- app/composite/gimp-composite.c | 28 +- app/composite/make-installer.py | 2 +- 21 files changed, 505 insertions(+), 384 deletions(-) diff --git a/ChangeLog b/ChangeLog index ea13018899..e14e215877 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,35 @@ +2004-07-20 Helvetix Victorinox + + * app/composite/gimp-composite.c (gimp_composite_init): Print the + list of active instruction sets if the --verbose command line + switch is ON (via be_verbose) + + * app/composite/gimp-composite-x86.h: Factored code from the mmx, + and sse implementations. + + * app/composite/make-installer.py: Raised the number of test + iterations from 1 to 10. + + * app/composite/gimp-composite-3dnow.[ch] + * app/composite/gimp-composite-3dnow-test.c + * app/composite/gimp-composite-3dnow-installer.c + * app/composite/gimp-composite-altivec.[ch] + * app/composite/gimp-composite-altivec-test.c + * app/composite/gimp-composite-altivec-installer.c + * app/composite/gimp-composite-mmx.[ch] + * app/composite/gimp-composite-altivec-test.c + * app/composite/gimp-composite-altivec-installer.c + * app/composite/gimp-composite-sse.[ch] + * app/composite/gimp-composite-sse-test.c + * app/composite/gimp-composite-sse-installer.c + * app/composite/gimp-composite-sse2.[ch] + * app/composite/gimp-composite-sse2-test.c + * app/composite/gimp-composite-sse2-installer.c + * app/composite/gimp-composite-vis.[ch] + * app/composite/gimp-composite-vis-test.c: + Regenerated sources via make-installer.py + + 2004-07-20 Sven Neumann * app/app_procs.c diff --git a/app/composite/gimp-composite-3dnow-test.c b/app/composite/gimp-composite-3dnow-test.c index af465840a3..1a553c163e 100644 --- a/app/composite/gimp-composite-3dnow-test.c +++ b/app/composite/gimp-composite-3dnow-test.c @@ -19,7 +19,7 @@ int gimp_composite_3dnow_test (int iterations, int n_pixels) { -#if (__GNUC__ >= 3) && defined(USE_3DNOW) && defined(ARCH_X86) && (defined(ARCH_X86_64) || !defined(PIC)) +#if defined(COMPILE_3DNOW_IS_OKAY) GimpCompositeContext generic_ctx; GimpCompositeContext special_ctx; double ft0; @@ -78,7 +78,7 @@ main (int argc, char *argv[]) putenv ("GIMP_COMPOSITE=0x1"); - iterations = 1; + iterations = 10; n_pixels = 1048593; argv++, argc--; diff --git a/app/composite/gimp-composite-3dnow.c b/app/composite/gimp-composite-3dnow.c index 8be15b8415..5dafd351a6 100644 --- a/app/composite/gimp-composite-3dnow.c +++ b/app/composite/gimp-composite-3dnow.c @@ -31,22 +31,17 @@ #include "base/cpu-accel.h" #include "gimp-composite.h" + #include "gimp-composite-3dnow.h" -#if defined(USE_MMX) -#if defined(ARCH_X86) -#if __GNUC__ >= 3 -#if defined(ARCH_X86_64) || !defined(PIC) +#ifdef COMPILE_3DNOW_IS_OKAY -#endif /* ARCH_X86_64 || !PIC */ -#endif /* __GNUC__ > 3 */ -#endif /* ARCH_X86 */ -#endif /* USE_MMX */ +#endif gboolean gimp_composite_3dnow_init (void) { -#if defined(USE_MMX) && defined(ARCH_X86) && (defined(ARCH_X86_64) || !defined(PIC)) +#ifdef COMPILE_3DNOW_IS_OKAY if (cpu_accel () & CPU_ACCEL_X86_3DNOW) { return (TRUE); diff --git a/app/composite/gimp-composite-3dnow.h b/app/composite/gimp-composite-3dnow.h index 699fadc0cf..91eeb470da 100644 --- a/app/composite/gimp-composite-3dnow.h +++ b/app/composite/gimp-composite-3dnow.h @@ -9,4 +9,16 @@ extern gboolean gimp_composite_3dnow_init (void); */ extern gboolean gimp_composite_3dnow_install (void); +#if !defined(__INTEL_COMPILER) +#if defined(USE_MMX) +#if defined(ARCH_X86) +#if __GNUC__ >= 3 +#if defined(ARCH_X86_64) || !defined(PIC) +#define COMPILE_3DNOW_IS_OKAY (1) +#endif /* ARCH_X86_64 || !PIC */ +#endif /* __GNUC__ > 3 */ +#endif /* ARCH_X86 */ +#endif /* USE_MMX */ +#endif /* !defined(__INTEL_COMPILER) */ + #endif diff --git a/app/composite/gimp-composite-altivec-test.c b/app/composite/gimp-composite-altivec-test.c index 3fb3a61abf..3e0ab7e4a1 100644 --- a/app/composite/gimp-composite-altivec-test.c +++ b/app/composite/gimp-composite-altivec-test.c @@ -19,7 +19,7 @@ int gimp_composite_altivec_test (int iterations, int n_pixels) { -#if (__GNUC__ >= 3) && defined(USE_ALTIVEC) && defined(ARCH_PPC) +#if defined(COMPILE_ALTIVEC_IS_OKAY) GimpCompositeContext generic_ctx; GimpCompositeContext special_ctx; double ft0; @@ -78,7 +78,7 @@ main (int argc, char *argv[]) putenv ("GIMP_COMPOSITE=0x1"); - iterations = 1; + iterations = 10; n_pixels = 1048593; argv++, argc--; diff --git a/app/composite/gimp-composite-altivec.c b/app/composite/gimp-composite-altivec.c index 7ad1652ccd..cdb6471046 100644 --- a/app/composite/gimp-composite-altivec.c +++ b/app/composite/gimp-composite-altivec.c @@ -30,16 +30,14 @@ #include "gimp-composite.h" #include "gimp-composite-altivec.h" -#ifdef ARCH_PPC -#if __GNUC__ >= 3 +#ifdef COMPILE_ALTIVEC_IS_OKAY -#endif /* __GNUC__ > 3 */ -#endif /* ARCH_PPC */ +#endif gboolean gimp_composite_altivec_init (void) { -#ifdef ARCH_PPC +#ifdef COMPILE_ALTIVEC_IS_OKAY if (cpu_accel () & CPU_ACCEL_PPC_ALTIVEC) { return (TRUE); diff --git a/app/composite/gimp-composite-altivec.h b/app/composite/gimp-composite-altivec.h index e376763f14..4875bbc2f6 100644 --- a/app/composite/gimp-composite-altivec.h +++ b/app/composite/gimp-composite-altivec.h @@ -9,4 +9,10 @@ extern gboolean gimp_composite_altivec_init (void); */ extern gboolean gimp_composite_altivec_install (void); +#ifdef ARCH_PPC +#if __GNUC__ >= 3 +#define COMPILE_ALTIVEC_IS_OKAY (1) +#endif /* __GNUC__ > 3 */ +#endif /* ARCH_PPC */ + #endif diff --git a/app/composite/gimp-composite-mmx-installer.c b/app/composite/gimp-composite-mmx-installer.c index 30e8d89ac2..c36ff9c147 100644 --- a/app/composite/gimp-composite-mmx-installer.c +++ b/app/composite/gimp-composite-mmx-installer.c @@ -16,7 +16,7 @@ static struct install_table { GimpPixelFormat D; void (*function)(GimpCompositeContext *); } _gimp_composite_mmx[] = { -#if (__GNUC__ >= 3) && defined(USE_MMX) && defined(ARCH_X86) && (defined(ARCH_X86_64) || !defined(PIC)) +#if defined(COMPILE_MMX_IS_OKAY) { GIMP_COMPOSITE_MULTIPLY, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_multiply_rgba8_rgba8_rgba8_mmx }, { GIMP_COMPOSITE_SCREEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_screen_rgba8_rgba8_rgba8_mmx }, { GIMP_COMPOSITE_DIFFERENCE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_difference_rgba8_rgba8_rgba8_mmx }, diff --git a/app/composite/gimp-composite-mmx-test.c b/app/composite/gimp-composite-mmx-test.c index 3bf1e764d6..1b3537411a 100644 --- a/app/composite/gimp-composite-mmx-test.c +++ b/app/composite/gimp-composite-mmx-test.c @@ -19,7 +19,7 @@ int gimp_composite_mmx_test (int iterations, int n_pixels) { -#if (__GNUC__ >= 3) && defined(USE_MMX) && defined(ARCH_X86) && (defined(ARCH_X86_64) || !defined(PIC)) +#if defined(COMPILE_MMX_IS_OKAY) GimpCompositeContext generic_ctx; GimpCompositeContext special_ctx; double ft0; @@ -210,7 +210,7 @@ main (int argc, char *argv[]) putenv ("GIMP_COMPOSITE=0x1"); - iterations = 1; + iterations = 10; n_pixels = 1048593; argv++, argc--; diff --git a/app/composite/gimp-composite-mmx.c b/app/composite/gimp-composite-mmx.c index d67d805aa4..522779d810 100644 --- a/app/composite/gimp-composite-mmx.c +++ b/app/composite/gimp-composite-mmx.c @@ -62,19 +62,19 @@ debug_display_mmx(void) printf("--------------------------------------------\n"); } -static const guint32 rgba8_alpha_mask[2] = { 0xFF000000, 0xFF000000 }; -static const guint32 rgba8_b1[2] = { 0x01010101, 0x01010101 }; -static const guint32 rgba8_b255[2] = { 0xFFFFFFFF, 0xFFFFFFFF }; -static const guint32 rgba8_w1[2] = { 0x00010001, 0x00010001 }; -static const guint32 rgba8_w2[2] = { 0x00020002, 0x00020002 }; -static const guint32 rgba8_w128[2] = { 0x00800080, 0x00800080 }; -static const guint32 rgba8_w256[2] = { 0x01000100, 0x01000100 }; -static const guint32 rgba8_w255[2] = { 0X00FF00FF, 0X00FF00FF }; +const guint32 rgba8_alpha_mask_64[2] = { 0xFF000000, 0xFF000000 }; +const guint32 rgba8_b1_64[2] = { 0x01010101, 0x01010101 }; +const guint32 rgba8_b255_64[2] = { 0xFFFFFFFF, 0xFFFFFFFF }; +const guint32 rgba8_w1_64[2] = { 0x00010001, 0x00010001 }; +const guint32 rgba8_w2_64[2] = { 0x00020002, 0x00020002 }; +const guint32 rgba8_w128_64[2] = { 0x00800080, 0x00800080 }; +const guint32 rgba8_w256_64[2] = { 0x01000100, 0x01000100 }; +const guint32 rgba8_w255_64[2] = { 0X00FF00FF, 0X00FF00FF }; -static const guint32 va8_alpha_mask[2] = { 0xFF00FF00, 0xFF00FF00 }; -static const guint32 va8_b255[2] = { 0xFFFFFFFF, 0xFFFFFFFF }; -static const guint32 va8_w1[2] = { 0x00010001, 0x00010001 }; -static const guint32 va8_w255[2] = { 0x00FF00FF, 0x00FF00FF }; +const guint32 va8_alpha_mask_64[2] = { 0xFF00FF00, 0xFF00FF00 }; +const guint32 va8_b255_64[2] = { 0xFFFFFFFF, 0xFFFFFFFF }; +const guint32 va8_w1_64[2] = { 0x00010001, 0x00010001 }; +const guint32 va8_w255_64[2] = { 0x00FF00FF, 0x00FF00FF }; /* * @@ -89,13 +89,13 @@ gimp_composite_addition_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) asm volatile ("movq %0,%%mm0" : /* empty */ - : "m" (*rgba8_alpha_mask) + : "m" (*rgba8_alpha_mask_64) : "%mm0"); for (; n_pixels >= 2; n_pixels -= 2) { - asm volatile (" movq %1, %%mm2\n" - "\tmovq %2, %%mm3\n" + asm volatile (" movq %1, %%mm2\n" + "\tmovq %2, %%mm3\n" "\tmovq %%mm2, %%mm4\n" "\tpaddusb %%mm3, %%mm4\n" "\tmovq %%mm0, %%mm1\n" @@ -179,7 +179,7 @@ gimp_composite_burn_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) "\t" pminub(mm0,mm1,mm3) "\n" /* mm1 = min(mm0,mm1) clobber mm3 */ - "\tmovq %6,%%mm7\n" /* mm6 = rgba8_alpha_mask */ + "\tmovq %6,%%mm7\n" /* mm6 = rgba8_alpha_mask_64 */ "\tpand %%mm7,%%mm1\n" /* mm1 = mm7 & alpha_mask */ "\tpandn %%mm5,%%mm7\n" /* mm7 = ~mm7 & mm5 */ @@ -187,7 +187,7 @@ gimp_composite_burn_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) "\tmovq %%mm7,%0\n" : "=m" (*d) - : "m" (*a), "m" (*b), "m" (*rgba8_b255), "m" (*rgba8_w1), "m" (*rgba8_w255), "m" (*rgba8_alpha_mask) + : "m" (*a), "m" (*b), "m" (*rgba8_b255_64), "m" (*rgba8_w1_64), "m" (*rgba8_w255_64), "m" (*rgba8_alpha_mask_64) : pdivwqX_clobber, "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"); d++; b++; @@ -241,7 +241,7 @@ gimp_composite_burn_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) "\tmovd %%mm7,%0\n" : "=m" (*d) - : "m" (*a), "m" (*b), "m" (*rgba8_b255), "m" (*rgba8_w1), "m" (*rgba8_w255), "m" (*rgba8_alpha_mask) + : "m" (*a), "m" (*b), "m" (*rgba8_b255_64), "m" (*rgba8_w1_64), "m" (*rgba8_w255_64), "m" (*rgba8_alpha_mask_64) : pdivwqX_clobber, "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"); } @@ -293,7 +293,7 @@ gimp_composite_difference_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) uint64 *b = (uint64 *) _op->B; gulong n_pixels = _op->n_pixels; - asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask) : "%mm0"); + asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask_64) : "%mm0"); for (; n_pixels >= 2; n_pixels -= 2) { @@ -318,7 +318,7 @@ gimp_composite_difference_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) d++; } - if (n_pixels) + if (n_pixels > 0) { asm volatile (" movd %1, %%mm2\n" "\tmovd %2, %%mm3\n" @@ -352,7 +352,7 @@ xxxgimp_composite_divide_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) asm volatile (" movq %0, %%mm0\n" "\tmovq %1, %%mm7\n" : - : "m" (*rgba8_alpha_mask), "m" (*rgba8_w1) + : "m" (*rgba8_alpha_mask_64), "m" (*rgba8_w1_64) : "%mm0", "%mm7"); for (; n_pixels >= 2; n_pixels -= 2) @@ -382,7 +382,7 @@ xxxgimp_composite_divide_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) "\tpackuswb %%mm4,%%mm5\n" /* expects mm4 and mm5 to be signed values */ "\t" pminub(mm0,mm1,mm3) "\n" - "\tmovq %3,%%mm3\n" + "\tmovq %3,%%mm3\n" "\tmovq %%mm3,%%mm2\n" "\tpandn %%mm5,%%mm3\n" @@ -392,14 +392,14 @@ xxxgimp_composite_divide_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) "\tmovq %%mm3,%0\n" : "=m" (*d) - : "m" (*a), "m" (*b), "m" (*rgba8_alpha_mask) - : "%eax", "%ecx", "%edx", "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); + : "m" (*a), "m" (*b), "m" (*rgba8_alpha_mask_64) + : pdivwuqX_clobber, "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); a++; b++; d++; } - if (n_pixels) + if (n_pixels > 0) { asm volatile (" movd %1,%%mm0\n" "\tmovd %2,%%mm1\n" @@ -436,8 +436,8 @@ xxxgimp_composite_divide_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) "\tmovd %%mm3,%0\n" : "=m" (*d) - : "m" (*a), "m" (*b), "m" (*rgba8_alpha_mask) - : "%eax", "%ecx", "%edx", "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); + : "m" (*a), "m" (*b), "m" (*rgba8_alpha_mask_64) + : pdivwuqX_clobber, "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); } asm("emms"); @@ -487,14 +487,14 @@ xxxgimp_composite_dodge_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) "\tmovq %%mm7,%0\n" : "=m" (*d) - : "m" (*a), "m" (*b), "m" (*rgba8_w256), "m" (*rgba8_alpha_mask) + : "m" (*a), "m" (*b), "m" (*rgba8_w256_64), "m" (*rgba8_alpha_mask_64) : pdivwuqX_clobber, "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"); a++; b++; d++; } - if (n_pixels) + if (n_pixels > 0) { asm volatile (" movd %0,%%mm0\n" "\tmovq %1,%%mm1\n" @@ -530,7 +530,7 @@ xxxgimp_composite_dodge_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) "\tmovd %%mm7,%2\n" : /* empty */ - : "m" (*a), "m" (*b), "m" (*d), "m" (*rgba8_w256), "m" (*rgba8_alpha_mask) + : "m" (*a), "m" (*b), "m" (*d), "m" (*rgba8_w256_64), "m" (*rgba8_alpha_mask_64) : pdivwuqX_clobber, "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); } @@ -540,16 +540,22 @@ xxxgimp_composite_dodge_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) void gimp_composite_grain_extract_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) { - GimpCompositeContext op = *_op; + uint64 *d = (uint64 *) _op->D; + uint64 *a = (uint64 *) _op->A; + uint64 *b = (uint64 *) _op->B; + gulong n_pixels = _op->n_pixels; - asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask) : "%mm0"); - asm volatile ("pxor %%mm6,%%mm6" : : : "%mm6"); - asm volatile ("movq %0,%%mm7" : : "m" (*rgba8_w128) : "%mm7"); + asm volatile ("movq %0,%%mm0\n" + "pxor %%mm6,%%mm6\n" + "movq %1,%%mm7\n" + : /* no outputs */ + : "m" (*rgba8_alpha_mask_64), "m" (*rgba8_w128_64) + : "%mm0", "%mm7", "%mm6"); - for (; op.n_pixels >= 2; op.n_pixels -= 2) + for (; n_pixels >= 2; n_pixels -= 2) { - asm volatile (" movq %0,%%mm2\n" - "\tmovq %1,%%mm3\n" + asm volatile (" movq %1,%%mm2\n" + "\tmovq %2,%%mm3\n" mmx_low_bytes_to_words(mm2,mm4,mm6) mmx_low_bytes_to_words(mm3,mm5,mm6) "\tpsubw %%mm5,%%mm4\n" @@ -572,19 +578,19 @@ gimp_composite_grain_extract_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) "\tpand %%mm0,%%mm2\n" "\tpor %%mm2,%%mm1\n" - "\tmovq %%mm1,%2\n" - : /* empty */ - : "m" (*op.A), "m" (*op.B), "m" (*op.D) - : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4"); - op.A += 8; - op.B += 8; - op.D += 8; + "\tmovq %%mm1,%0\n" + : "=m" (*d) + : "m" (*a), "m" (*b) + : "%mm1", "%mm2", "%mm3", "%mm4"); + a++; + b++; + d++; } - if (op.n_pixels) + if (n_pixels > 0) { - asm volatile (" movd %0, %%mm2\n" - "\tmovd %1, %%mm3\n" + asm volatile (" movd %1, %%mm2\n" + "\tmovd %2, %%mm3\n" mmx_low_bytes_to_words(mm2,mm4,mm6) mmx_low_bytes_to_words(mm3,mm5,mm6) @@ -603,10 +609,10 @@ gimp_composite_grain_extract_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) "\tpand %%mm0, %%mm2\n" "\tpor %%mm2, %%mm1\n" - "\tmovd %%mm1, %2\n" - : /* empty */ - : "m" (*op.A), "m" (*op.B), "m" (*op.D) - : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4"); + "\tmovd %%mm1, %0\n" + : "=m" (*d) + : "m" (*a), "m" (*b) + : "%mm1", "%mm2", "%mm3", "%mm4"); } asm("emms"); @@ -615,19 +621,22 @@ gimp_composite_grain_extract_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) void gimp_composite_grain_merge_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) { - GimpCompositeContext op = *_op; + uint64 *d = (uint64 *) _op->D; + uint64 *a = (uint64 *) _op->A; + uint64 *b = (uint64 *) _op->B; + gulong n_pixels = _op->n_pixels; asm volatile ("movq %0, %%mm0\n" "pxor %%mm6, %%mm6\n" "movq %1, %%mm7\n" : /* empty */ - : "m" (*rgba8_alpha_mask), "m" (*rgba8_w128) + : "m" (*rgba8_alpha_mask_64), "m" (*rgba8_w128_64) : "%mm0", "%mm6", "%mm7"); - for (; op.n_pixels >= 2; op.n_pixels -= 2) + for (; n_pixels >= 2; n_pixels -= 2) { - asm volatile (" movq %0, %%mm2\n" - "\tmovq %1, %%mm3\n" + asm volatile (" movq %1, %%mm2\n" + "\tmovq %2, %%mm3\n" mmx_low_bytes_to_words(mm2,mm4,mm6) mmx_low_bytes_to_words(mm3,mm5,mm6) @@ -647,19 +656,19 @@ gimp_composite_grain_merge_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) "\tmovq %%mm0, %%mm1\n" "\tpandn %%mm4, %%mm1\n" "\tpor %%mm2, %%mm1\n" - "\tmovq %%mm1, %2\n" - : /* empty */ - : "m" (*op.A), "m" (*op.B), "m" (*op.D) - : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4"); - op.A += 8; - op.B += 8; - op.D += 8; + "\tmovq %%mm1, %0\n" + : "=m" (*d) + : "m" (*a), "m" (*b) + : "%mm1", "%mm2", "%mm3", "%mm4"); + a++; + b++; + d++; } - if (op.n_pixels) + if (n_pixels > 0) { - asm volatile (" movd %0, %%mm2\n" - "\tmovd %1, %%mm3\n" + asm volatile (" movd %1, %%mm2\n" + "\tmovd %2, %%mm3\n" mmx_low_bytes_to_words(mm2,mm4,mm6) mmx_low_bytes_to_words(mm3,mm5,mm6) @@ -677,10 +686,10 @@ gimp_composite_grain_merge_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) "\tpand %%mm0, %%mm2\n" "\tpor %%mm2, %%mm1\n" - "\tmovd %%mm1, %2\n" - : /* empty */ - : "m" (*op.A), "m" (*op.B), "m" (*op.D) - : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4"); + "\tmovd %%mm1, %0\n" + : "=m" (*d) + : "m" (*a), "m" (*b) + : "%mm1", "%mm2", "%mm3", "%mm4"); } asm("emms"); @@ -689,14 +698,17 @@ gimp_composite_grain_merge_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) void gimp_composite_lighten_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) { - GimpCompositeContext op = *_op; + uint64 *d = (uint64 *) _op->D; + uint64 *a = (uint64 *) _op->A; + uint64 *b = (uint64 *) _op->B; + gulong n_pixels = _op->n_pixels; - asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask) : "%mm0"); + asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask_64) : "%mm0"); - for (; op.n_pixels >= 2; op.n_pixels -= 2) + for (; n_pixels >= 2; n_pixels -= 2) { - asm volatile (" movq %0, %%mm2\n" - "\tmovq %1, %%mm3\n" + asm volatile (" movq %1, %%mm2\n" + "\tmovq %2, %%mm3\n" "\tmovq %%mm2, %%mm4\n" "\t" pmaxub(mm3,mm4,mm5) "\n" "\tmovq %%mm0, %%mm1\n" @@ -704,34 +716,34 @@ gimp_composite_lighten_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) "\t" pminub(mm2,mm3,mm4) "\n" "\tpand %%mm0, %%mm3\n" "\tpor %%mm3, %%mm1\n" - "\tmovq %%mm1, %2\n" - : /* empty */ - : "m" (*op.A), "m" (*op.B), "m" (*op.D) - : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); - op.A += 8; - op.B += 8; - op.D += 8; + "\tmovq %%mm1, %0\n" + : "=m" (*d) + : "m" (*a), "m" (*b) + : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); + a++; + b++; + d++; } - if (op.n_pixels) + if (n_pixels > 0) { - asm volatile (" movd %0, %%mm2\n" - "\tmovd %1, %%mm3\n" - "\tmovq %%mm2, %%mm4\n" - "\t" pmaxub(mm3,mm4,mm5) "\n" + asm volatile (" movd %1, %%mm2\n" + "\tmovd %2, %%mm3\n" + "\tmovq %%mm2, %%mm4\n" + "\t" pmaxub(mm3,mm4,mm5) "\n" - "\tmovq %%mm0, %%mm1\n" - "\tpandn %%mm4, %%mm1\n" + "\tmovq %%mm0, %%mm1\n" + "\tpandn %%mm4, %%mm1\n" - "\t" pminub(mm2,mm3,mm4) "\n" + "\t" pminub(mm2,mm3,mm4) "\n" - "\tpand %%mm0, %%mm3\n" - "\tpor %%mm3, %%mm1\n" - "\tmovd %%mm1, %2\n" - : /* empty */ - : "m" (*op.A), "m" (*op.B), "m" (*op.D) - : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); - } + "\tpand %%mm0, %%mm3\n" + "\tpor %%mm3, %%mm1\n" + "\tmovd %%mm1, %0\n" + : "=m" (*d) + : "m" (*a), "m" (*b) + : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); + } asm("emms"); } @@ -739,16 +751,23 @@ gimp_composite_lighten_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) void gimp_composite_multiply_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) { - GimpCompositeContext op = *_op; + uint64 *d = (uint64 *) _op->D; + uint64 *a = (uint64 *) _op->A; + uint64 *b = (uint64 *) _op->B; + gulong n_pixels = _op->n_pixels; - asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask) : "%mm0"); - asm volatile ("movq %0,%%mm7" : : "m" (*rgba8_w128) : "%mm7"); - asm volatile ("pxor %%mm6,%%mm6" : : : "%mm6"); + asm volatile ( + "movq %0,%%mm0\n" + "movq %1,%%mm7\n" + "pxor %%mm6,%%mm6\n" + : /* empty */ + : "m" (*rgba8_alpha_mask_64), "m" (*rgba8_w128_64) + : "%mm6", "%mm7", "%mm0"); - for (; op.n_pixels >= 2; op.n_pixels -= 2) + for (; n_pixels >= 2; n_pixels -= 2) { - asm volatile (" movq %0, %%mm2\n" - "\tmovq %1, %%mm3\n" + asm volatile (" movq %1, %%mm2\n" + "\tmovq %2, %%mm3\n" mmx_low_bytes_to_words(mm2,mm1,mm6) mmx_low_bytes_to_words(mm3,mm5,mm6) @@ -767,37 +786,37 @@ gimp_composite_multiply_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) "\tpand %%mm0, %%mm2\n" "\tpor %%mm2, %%mm1\n" - "\tmovq %%mm1, %2\n" - : /* empty */ - : "m" (*op.A), "m" (*op.B), "m" (*op.D) - : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); - op.A += 8; - op.B += 8; - op.D += 8; + "\tmovq %%mm1, %0\n" + : "=m" (*d) + : "m" (*a), "m" (*b) + : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); + a++; + b++; + d++; } - if (op.n_pixels) + if (n_pixels > 0) { - asm volatile (" movd %0, %%mm2\n" - "\tmovd %1, %%mm3\n" + asm volatile (" movd %1, %%mm2\n" + "\tmovd %2, %%mm3\n" - mmx_low_bytes_to_words(mm2,mm1,mm6) - mmx_low_bytes_to_words(mm3,mm5,mm6) - pmulwX(mm5,mm1,mm7) + mmx_low_bytes_to_words(mm2,mm1,mm6) + mmx_low_bytes_to_words(mm3,mm5,mm6) + pmulwX(mm5,mm1,mm7) - "\tpackuswb %%mm6, %%mm1\n" + "\tpackuswb %%mm6, %%mm1\n" - "\tmovq %%mm0, %%mm4\n" - "\tpandn %%mm1, %%mm4\n" - "\tmovq %%mm4, %%mm1\n" - "\t" pminub(mm3,mm2,mm4) "\n" - "\tpand %%mm0, %%mm2\n" - "\tpor %%mm2, %%mm1\n" + "\tmovq %%mm0, %%mm4\n" + "\tpandn %%mm1, %%mm4\n" + "\tmovq %%mm4, %%mm1\n" + "\t" pminub(mm3,mm2,mm4) "\n" + "\tpand %%mm0, %%mm2\n" + "\tpor %%mm2, %%mm1\n" - "\tmovd %%mm1, %2\n" - : /* empty */ - : "m" (*op.A), "m" (*op.B), "m" (*op.D) - : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); + "\tmovd %%mm1, %0\n" + : "=m" (*d) + : "m" (*a), "m" (*b) + : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); } asm("emms"); @@ -849,21 +868,24 @@ mmx_op_overlay(void) "\tpor %%mm3,%%mm1\n" : /* empty */ - : "m" (*rgba8_w2), "m" (*rgba8_alpha_mask) + : "m" (*rgba8_w2_64), "m" (*rgba8_alpha_mask_64) ); } void xxxgimp_composite_overlay_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) { - GimpCompositeContext op = *_op; + uint64 *d = (uint64 *) _op->D; + uint64 *a = (uint64 *) _op->A; + uint64 *b = (uint64 *) _op->B; + gulong n_pixels = _op->n_pixels; asm volatile ("pxor %%mm0,%%mm0\n" "movq %0,%%mm7" : /* empty */ - : "m" (*rgba8_w128) : "%mm0"); + : "m" (*rgba8_w128_64) : "%mm0"); - for (; op.n_pixels >= 2; op.n_pixels -= 2) + for (; n_pixels >= 2; n_pixels -= 2) { asm volatile (" movq %0,%%mm2\n" "\tmovq %1,%%mm3\n" @@ -910,25 +932,25 @@ xxxgimp_composite_overlay_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) "\tpor %%mm3,%%mm1\n" "\tmovq %%mm1,%2\n" - : "+m" (*op.A), "+m" (*op.B), "+m" (*op.D) - : "m" (*rgba8_w2), "m" (*rgba8_alpha_mask) - : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4"); - op.A += 8; - op.B += 8; - op.D += 8; + : "+m" (*a), "+m" (*b), "+m" (*d) + : "m" (*rgba8_w2_64), "m" (*rgba8_alpha_mask_64) + : "%mm1", "%mm2", "%mm3", "%mm4"); + a++; + b++; + d++; } - if (op.n_pixels) + if (n_pixels > 0) { - asm volatile (" movd %0,%%mm2\n" - "\tmovd %1,%%mm3\n" + asm volatile (" movd %1,%%mm2\n" + "\tmovd %2,%%mm3\n" /* low bytes */ mmx_low_bytes_to_words(mm3,mm5,mm0) "\tpcmpeqb %%mm4,%%mm4\n" "\tpsubb %%mm2,%%mm4\n" /* mm4 = 255 - A */ "\tpunpcklbw %%mm0,%%mm4\n" /* mm4 = (low bytes as word) mm4 */ - "\tmovq %3,%%mm6\n" /* mm6 = words of value 2 */ + "\tmovq %3,%%mm6\n" /* mm6 = words of integer value 2 */ "\tpmullw %%mm5,%%mm6\n" /* mm6 = 2 * low bytes of B */ mmx_int_mult(mm6,mm4,mm7) /* mm4 = INT_MULT(mm6, mm4) */ @@ -937,7 +959,7 @@ xxxgimp_composite_overlay_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) "\tpcmpeqb %%mm1,%%mm1\n" "\tpsubb %%mm2,%%mm1\n" /* mm1 = 255 - A */ "\tpunpckhbw %%mm0,%%mm1\n" /* mm1 = (high bytes as word) mm1 */ - "\tmovq %3,%%mm6\n" /* mm6 = words of value 2 */ + "\tmovq %3,%%mm6\n" /* mm6 = words of integer value 2 */ "\tpmullw %%mm5,%%mm6\n" /* mm6 = 2 * high bytes of B */ mmx_int_mult(mm6,mm1,mm7) /* mm1 = INT_MULT(mm6, mm1) */ @@ -964,10 +986,10 @@ xxxgimp_composite_overlay_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) "\tpor %%mm3,%%mm1\n" - "\tmovd %%mm1,%2\n" - : /* empty */ - : "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*rgba8_w2), "m" (*rgba8_alpha_mask) - : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4"); + "\tmovd %%mm1,%0\n" + : "=m" (*d) + : "m" (*a), "m" (*b), "m" (*rgba8_w2_64), "m" (*rgba8_alpha_mask_64) + : "%mm1", "%mm2", "%mm3", "%mm4"); } asm("emms"); @@ -977,23 +999,25 @@ xxxgimp_composite_overlay_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) void gimp_composite_scale_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) { - GimpCompositeContext op = *_op; + uint64 *d = (uint64 *) _op->D; + uint64 *a = (uint64 *) _op->A; + gulong n_pixels = _op->n_pixels; asm volatile ("pxor %%mm0,%%mm0\n" - "\tmovl %0,%%eax\n" + "\tmovl %0,%%eax\n" "\tmovl %%eax,%%ebx\n" - "\tshl $16,%%ebx\n" + "\tshl $16,%%ebx\n" "\torl %%ebx,%%eax\n" "\tmovd %%eax,%%mm5\n" "\tmovd %%eax,%%mm3\n" - "\tpsllq $32,%%mm5\n" + "\tpsllq $32,%%mm5\n" "\tpor %%mm5,%%mm3\n" - "\tmovq %1,%%mm7\n" + "\tmovq %1,%%mm7\n" : /* empty */ - : "m" (op.scale.scale), "m" (*rgba8_w128) - : "%eax", "%mm0", "%mm5", "%mm6", "%mm7"); + : "m" (_op->scale.scale), "m" (*rgba8_w128_64) + : "%eax", "%ebx", "%mm0", "%mm5", "%mm6", "%mm7"); - for (; op.n_pixels >= 2; op.n_pixels -= 2) + for (; n_pixels >= 2; n_pixels -= 2) { asm volatile ("movq %1,%%mm2\n" "\tmovq %%mm2,%%mm1\n" @@ -1010,15 +1034,15 @@ gimp_composite_scale_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) "\tpackuswb %%mm4,%%mm1\n" - "\tmovq %%mm1,%0\n" - : "=m" (*op.D) - : "m" (*op.A) + "\tmovq %%mm1,%0\n" + : "=m" (*d) + : "m" (*a) : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"); - op.A += 8; - op.D += 8; + a++; + d++; } - if (op.n_pixels) + if (n_pixels > 0) { asm volatile ("movd %1,%%mm2\n" "\tmovq %%mm2,%%mm1\n" @@ -1029,8 +1053,8 @@ gimp_composite_scale_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) "\tpackuswb %%mm0,%%mm1\n" "\tmovd %%mm1,%0\n" - : "=m" (*op.D) - : "m" (*op.A) + : "=m" (*d) + : "m" (*a) : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"); } @@ -1040,16 +1064,22 @@ gimp_composite_scale_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) void gimp_composite_screen_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) { - GimpCompositeContext op = *_op; + uint64 *d = (uint64 *) _op->D; + uint64 *a = (uint64 *) _op->A; + uint64 *b = (uint64 *) _op->B; + gulong n_pixels = _op->n_pixels; - asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask) : "%mm0"); - asm volatile ("movq %0,%%mm7" : : "m" (*rgba8_w128) : "%mm7"); - asm volatile ("pxor %mm6, %mm6"); + asm volatile ("pxor %%mm6,%%mm6\n" + "movq %0,%%mm0\n" + "movq %1,%%mm7\n" + : /* empty */ + : "m" (*rgba8_alpha_mask_64), "m" (*rgba8_w128_64) + : "%mm0", "%mm6", "%mm7"); - for (; op.n_pixels >= 2; op.n_pixels -= 2) + for (; n_pixels >= 2; n_pixels -= 2) { - asm volatile (" movq %0,%%mm2\n" - "\tmovq %1,%%mm3\n" + asm volatile (" movq %1,%%mm2\n" + "\tmovq %2,%%mm3\n" "\tpcmpeqb %%mm4,%%mm4\n" "\tpsubb %%mm2,%%mm4\n" @@ -1092,79 +1122,82 @@ gimp_composite_screen_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) "\tpor %%mm3,%%mm1\n" - "\tmovq %%mm1,%2\n" - : /* empty */ - : "m" (*op.A), "m" (*op.B), "m" (*op.D) - : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); - op.A += 8; - op.B += 8; - op.D += 8; + "\tmovq %%mm1,%0\n" + : "=m" (*d) + : "m" (*a), "m" (*b) + : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); + a++; + b++; + d++; } - if (op.n_pixels) + if (n_pixels > 0) { - asm volatile (" movd %0,%%mm2\n" - "\tmovd %1,%%mm3\n" + asm volatile (" movd %1,%%mm2\n" + "\tmovd %2,%%mm3\n" - "\tpcmpeqb %%mm4,%%mm4\n" - "\tpsubb %%mm2,%%mm4\n" - "\tpcmpeqb %%mm5,%%mm5\n" - "\tpsubb %%mm3,%%mm5\n" + "\tpcmpeqb %%mm4,%%mm4\n" + "\tpsubb %%mm2,%%mm4\n" + "\tpcmpeqb %%mm5,%%mm5\n" + "\tpsubb %%mm3,%%mm5\n" - "\tpunpcklbw %%mm6,%%mm4\n" - "\tpunpcklbw %%mm6,%%mm5\n" - "\tpmullw %%mm4,%%mm5\n" - "\tpaddw %%mm7,%%mm5\n" - "\tmovq %%mm5,%%mm1\n" - "\tpsrlw $ 8,%%mm1\n" - "\tpaddw %%mm5,%%mm1\n" - "\tpsrlw $ 8,%%mm1\n" + "\tpunpcklbw %%mm6,%%mm4\n" + "\tpunpcklbw %%mm6,%%mm5\n" + "\tpmullw %%mm4,%%mm5\n" + "\tpaddw %%mm7,%%mm5\n" + "\tmovq %%mm5,%%mm1\n" + "\tpsrlw $ 8,%%mm1\n" + "\tpaddw %%mm5,%%mm1\n" + "\tpsrlw $ 8,%%mm1\n" - "\tpcmpeqb %%mm4,%%mm4\n" - "\tpsubb %%mm2,%%mm4\n" - "\tpcmpeqb %%mm5,%%mm5\n" - "\tpsubb %%mm3,%%mm5\n" + "\tpcmpeqb %%mm4,%%mm4\n" + "\tpsubb %%mm2,%%mm4\n" + "\tpcmpeqb %%mm5,%%mm5\n" + "\tpsubb %%mm3,%%mm5\n" - "\tpunpckhbw %%mm6,%%mm4\n" - "\tpunpckhbw %%mm6,%%mm5\n" - "\tpmullw %%mm4,%%mm5\n" - "\tpaddw %%mm7,%%mm5\n" - "\tmovq %%mm5,%%mm4\n" - "\tpsrlw $ 8,%%mm4\n" - "\tpaddw %%mm5,%%mm4\n" - "\tpsrlw $ 8,%%mm4\n" + "\tpunpckhbw %%mm6,%%mm4\n" + "\tpunpckhbw %%mm6,%%mm5\n" + "\tpmullw %%mm4,%%mm5\n" + "\tpaddw %%mm7,%%mm5\n" + "\tmovq %%mm5,%%mm4\n" + "\tpsrlw $ 8,%%mm4\n" + "\tpaddw %%mm5,%%mm4\n" + "\tpsrlw $ 8,%%mm4\n" - "\tpackuswb %%mm4,%%mm1\n" + "\tpackuswb %%mm4,%%mm1\n" - "\tpcmpeqb %%mm4,%%mm4\n" - "\tpsubb %%mm1,%%mm4\n" + "\tpcmpeqb %%mm4,%%mm4\n" + "\tpsubb %%mm1,%%mm4\n" - "\tmovq %%mm0,%%mm1\n" - "\tpandn %%mm4,%%mm1\n" + "\tmovq %%mm0,%%mm1\n" + "\tpandn %%mm4,%%mm1\n" - "\t" pminub(mm2,mm3,mm4) "\n" - "\tpand %%mm0,%%mm3\n" + "\t" pminub(mm2,mm3,mm4) "\n" + "\tpand %%mm0,%%mm3\n" - "\tpor %%mm3,%%mm1\n" + "\tpor %%mm3,%%mm1\n" + + "\tmovd %%mm1,%0\n" + : "=m" (*d) + : "m" (*a), "m" (*b) + : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); + } - "\tmovd %%mm1,%2\n" - : /* empty */ - : "m" (*op.A), "m" (*op.B), "m" (*op.D) - : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); - } - - asm("emms"); + asm volatile ("emms"); } void gimp_composite_subtract_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) { - GimpCompositeContext op = *_op; + uint64 *d = (uint64 *) _op->D; + uint64 *a = (uint64 *) _op->A; + uint64 *b = (uint64 *) _op->B; + gulong n_pixels = _op->n_pixels; - asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask) : "%mm0"); + asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask_64) : "%mm0"); - for (; op.n_pixels >= 2; op.n_pixels -= 2) + for (; n_pixels >= 2; n_pixels -= 2) { asm volatile (" movq %1,%%mm2\n" "\tmovq %2,%%mm3\n" @@ -1180,65 +1213,68 @@ gimp_composite_subtract_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) "\tpand %%mm0,%%mm2\n" "\tpor %%mm2,%%mm1\n" "\tmovq %%mm1,%0\n" - : "=m" (*op.D) - : "m" (*op.A), "m" (*op.B) - : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); - op.A += 8; - op.B += 8; - op.D += 8; + : "=m" (*d) + : "m" (*a), "m" (*b) + : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); + a++; + b++; + d++; } - if (op.n_pixels) + if (n_pixels > 0) { - asm volatile (" movd %0,%%mm2\n" - "\tmovd %1,%%mm3\n" + asm volatile (" movd %1,%%mm2\n" + "\tmovd %2,%%mm3\n" - "\tmovq %%mm2,%%mm4\n" - "\tpsubusb %%mm3,%%mm4\n" + "\tmovq %%mm2,%%mm4\n" + "\tpsubusb %%mm3,%%mm4\n" - "\tmovq %%mm0,%%mm1\n" - "\tpandn %%mm4,%%mm1\n" + "\tmovq %%mm0,%%mm1\n" + "\tpandn %%mm4,%%mm1\n" - "\t" pminub(mm3,mm2,mm4) "\n" + "\t" pminub(mm3,mm2,mm4) "\n" - "\tpand %%mm0,%%mm2\n" - "\tpor %%mm2,%%mm1\n" - "\tmovd %%mm1,%2\n" - : /* empty */ - : "m" (*op.A), "m" (*op.B), "m" (*op.D) - : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); - } + "\tpand %%mm0,%%mm2\n" + "\tpor %%mm2,%%mm1\n" + "\tmovd %%mm1,%0\n" + : "=m" (*d) + : "m" (*a), "m" (*b) + : "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); + } - asm("emms"); + asm volatile ("emms"); } void gimp_composite_swap_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) { - GimpCompositeContext op = *_op; + uint64 *d = (uint64 *) _op->D; + uint64 *a = (uint64 *) _op->A; + uint64 *b = (uint64 *) _op->B; + gulong n_pixels = _op->n_pixels; - for (; op.n_pixels >= 2; op.n_pixels -= 2) + for (; n_pixels >= 2; n_pixels -= 2) { asm volatile (" movq %0,%%mm2\n" "\tmovq %1,%%mm3\n" "\tmovq %%mm3,%0\n" "\tmovq %%mm2,%1\n" - : /* empty */ - : "m" (*op.A), "m" (*op.B) - : "0", "1", "%mm1", "%mm2", "%mm3", "%mm4"); - op.A += 8; - op.B += 8; + : "+m" (*a), "+m" (*b) + : + : "%mm1", "%mm2", "%mm3", "%mm4"); + a++; + b++; } - if (op.n_pixels) + if (n_pixels > 0) { asm volatile (" movd %0,%%mm2\n" "\tmovd %1,%%mm3\n" "\tmovd %%mm3,%0\n" "\tmovd %%mm2,%1\n" - : /* empty */ - : "m" (*op.A), "m" (*op.B) - : "0", "1", "%mm1", "%mm2", "%mm3", "%mm4"); + : "+m" (*a), "+m" (*b) + : + : "%mm1", "%mm2", "%mm3", "%mm4"); } asm("emms"); @@ -1388,7 +1424,7 @@ gimp_composite_burn_va8_va8_va8_mmx (GimpCompositeContext *_op) "\tmovq %%mm7,%2\n" : /* empty */ - : "+m" (*op.A), "+m" (*op.B), "+m" (*op.D), "m" (*va8_b255), "m" (*va8_w1), "m" (*va8_w255), "m" (*va8_alpha_mask) + : "+m" (*op.A), "+m" (*op.B), "+m" (*op.D), "m" (*va8_b255), "m" (*va8_w1), "m" (*va8_w255_64), "m" (*va8_alpha_mask) : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4"); op.A += 8; op.B += 8; @@ -1441,7 +1477,7 @@ gimp_composite_burn_va8_va8_va8_mmx (GimpCompositeContext *_op) "\tmovd %%mm7,%2\n" : /* empty */ - : "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*va8_b255), "m" (*va8_w1), "m" (*va8_w255), "m" (*va8_alpha_mask) + : "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*va8_b255), "m" (*va8_w1), "m" (*va8_w255_64), "m" (*va8_alpha_mask) : "0", "1", "2", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"); } diff --git a/app/composite/gimp-composite-sse-installer.c b/app/composite/gimp-composite-sse-installer.c index f7cc0b7635..a980b1f3c2 100644 --- a/app/composite/gimp-composite-sse-installer.c +++ b/app/composite/gimp-composite-sse-installer.c @@ -16,7 +16,7 @@ static struct install_table { GimpPixelFormat D; void (*function)(GimpCompositeContext *); } _gimp_composite_sse[] = { -#if (__GNUC__ >= 3) && defined(USE_SSE) && defined(ARCH_X86) && (defined(ARCH_X86_64) || !defined(PIC)) +#if defined(COMPILE_SSE_IS_OKAY) { GIMP_COMPOSITE_MULTIPLY, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_multiply_rgba8_rgba8_rgba8_sse }, { GIMP_COMPOSITE_SCREEN, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_screen_rgba8_rgba8_rgba8_sse }, { GIMP_COMPOSITE_DIFFERENCE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_difference_rgba8_rgba8_rgba8_sse }, diff --git a/app/composite/gimp-composite-sse-test.c b/app/composite/gimp-composite-sse-test.c index e800bfb8cb..4278c188a0 100644 --- a/app/composite/gimp-composite-sse-test.c +++ b/app/composite/gimp-composite-sse-test.c @@ -19,7 +19,7 @@ int gimp_composite_sse_test (int iterations, int n_pixels) { -#if (__GNUC__ >= 3) && defined(USE_SSE) && defined(ARCH_X86) && (defined(ARCH_X86_64) || !defined(PIC)) +#if defined(COMPILE_SSE_IS_OKAY) GimpCompositeContext generic_ctx; GimpCompositeContext special_ctx; double ft0; @@ -210,7 +210,7 @@ main (int argc, char *argv[]) putenv ("GIMP_COMPOSITE=0x1"); - iterations = 1; + iterations = 10; n_pixels = 1048593; argv++, argc--; diff --git a/app/composite/gimp-composite-sse.c b/app/composite/gimp-composite-sse.c index 50c4e4da23..641b8d64f5 100644 --- a/app/composite/gimp-composite-sse.c +++ b/app/composite/gimp-composite-sse.c @@ -48,6 +48,7 @@ #define pminub(src,dst,tmp) "pminub " "%%" #src ", %%" #dst #define pmaxub(src,dst,tmp) "pmaxub " "%%" #src ", %%" #dst +#if 0 /* * Double-word divide. Adjusted for subsequent unsigned packing * (high-order bit of each word is cleared) @@ -65,7 +66,8 @@ "roll $16, %%eax; " \ "btr $15, %%eax; " \ "movd %%eax,%%" #quotient ";" - +#endif +#if 0 /* * Quadword divide. No adjustment for subsequent unsigned packing * (high-order bit of each word is left alone) @@ -107,7 +109,8 @@ "movd %%eax,%%" #divisor ";" \ "psllq $32,%%" #divisor ";" \ "por %%" #divisor ",%%" #quotient ";" - +#endif +#if 0 /* equivalent to the INT_MULT() macro in gimp-composite-generic.c */ /* * opr2 = INT_MULT(opr1, opr2, t) @@ -126,7 +129,8 @@ "\tpsrlw $8, %%"#opr2"; " \ "\tpaddw %%"#opr1", %%"#opr2"; " \ "\tpsrlw $8, %%"#opr2"\n" - +#endif +#if 0 /* a = INT_MULT(a,b) */ #define mmx_int_mult(a,b,w128) \ "\tpmullw %%"#b", %%"#a"; " \ @@ -135,7 +139,9 @@ "\tpsrlw $8, %%"#b"; " \ "\tpaddw %%"#a", %%"#b"; " \ "\tpsrlw $8, %%"#b"\n" +#endif +#if 0 static const guint32 rgba8_alpha_mask_64[2] = { 0xFF000000, 0xFF000000 }; static const guint32 rgba8_b1_64[2] = { 0x01010101, 0x01010101 }; static const guint32 rgba8_b255_64[2] = { 0xFFFFFFFF, 0xFFFFFFFF }; @@ -149,6 +155,7 @@ static const guint32 va8_alpha_mask[2] = { 0xFF00FF00, 0xFF00FF00 }; static const guint32 va8_b255[2] = { 0xFFFFFFFF, 0xFFFFFFFF }; static const guint32 va8_w1[2] = { 0x00010001, 0x00010001 }; static const guint32 va8_w255[2] = { 0x00FF00FF, 0x00FF00FF }; +#endif /* * @@ -156,48 +163,51 @@ static const guint32 va8_w255[2] = { 0x00FF00FF, 0x00FF00FF }; void gimp_composite_addition_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op) { - GimpCompositeContext op = *_op; + uint64 *d = (uint64 *) _op->D; + uint64 *a = (uint64 *) _op->A; + uint64 *b = (uint64 *) _op->B; + gulong n_pixels = _op->n_pixels; asm volatile ("movq %0,%%mm0" : /* empty */ : "m" (*rgba8_alpha_mask_64) : "%mm0"); - for (; op.n_pixels >= 2; op.n_pixels -= 2) + for (; n_pixels >= 2; n_pixels -= 2) { - asm (" movq %1, %%mm2\n" - "\tmovq %2, %%mm3\n" - "\tmovq %%mm2, %%mm4\n" - "\tpaddusb %%mm3, %%mm4\n" - "\tmovq %%mm0, %%mm1\n" - "\tpandn %%mm4, %%mm1\n" - "\tpminub %%mm3, %%mm2\n" - "\tpand %%mm0, %%mm2\n" - "\tpor %%mm2, %%mm1\n" - "\tmovq %%mm1, %0\n" - : "=m" (*op.D) - : "m" (*op.A), "m" (*op.B) - : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"); - op.A += 8; - op.B += 8; - op.D += 8; - } - - if (op.n_pixels) - { - asm volatile (" movd %1, %%mm2\n" - "\tmovd %2, %%mm3\n" + asm volatile (" movq %1, %%mm2\n" + "\tmovq %2, %%mm3\n" "\tmovq %%mm2, %%mm4\n" "\tpaddusb %%mm3, %%mm4\n" "\tmovq %%mm0, %%mm1\n" "\tpandn %%mm4, %%mm1\n" - "\tpminub %%mm3, %%mm2\n" + "\t" pminub(mm3, mm2, mm4) "\n" + "\tpand %%mm0, %%mm2\n" + "\tpor %%mm2, %%mm1\n" + "\tmovq %%mm1, %0\n" + : "=m" (*d) + : "m" (*a), "m" (*b) + : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4"); + a++; + b++; + d++; + } + + if (n_pixels > 0) + { + asm volatile (" movd %1, %%mm2\n" + "\tmovd %2, %%mm3\n" + "\tmovq %%mm2, %%mm4\n" + "\tpaddusb %%mm3, %%mm4\n" + "\tmovq %%mm0, %%mm1\n" + "\tpandn %%mm4, %%mm1\n" + "\t" pminub(mm3, mm2, mm4) "\n" "\tpand %%mm0, %%mm2\n" "\tpor %%mm2, %%mm1\n" "\tmovd %%mm1, %0\n" - : "=m" (*op.D) - : "m" (*op.A), "m" (*op.B) - : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"); + : "=m" (*d) + : "m" (*a), "m" (*b) + : "%mm0", "%mm1", "%mm2", "%mm3", "%mm4"); } asm("emms"); @@ -207,63 +217,66 @@ gimp_composite_addition_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op) void gimp_composite_burn_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op) { - GimpCompositeContext op = *_op; + uint64 *d = (uint64 *) _op->D; + uint64 *a = (uint64 *) _op->A; + uint64 *b = (uint64 *) _op->B; + gulong n_pixels = _op->n_pixels; - for (; op.n_pixels >= 2; op.n_pixels -= 2) + for (; n_pixels >= 2; n_pixels -= 2) { - asm (" movq %1,%%mm0\n" - "\tmovq %2,%%mm1\n" + asm volatile (" movq %1,%%mm0\n" + "\tmovq %2,%%mm1\n" - "\tmovq %3,%%mm2\n" - "\tpsubb %%mm0,%%mm2\n" /* mm2 = 255 - A */ - "\tpxor %%mm4,%%mm4\n" - "\tpunpcklbw %%mm2,%%mm4\n" /* mm4 = (255- A) * 256 */ + "\tmovq %3,%%mm2\n" + "\tpsubb %%mm0,%%mm2\n" /* mm2 = 255 - A */ + "\tpxor %%mm4,%%mm4\n" + "\tpunpcklbw %%mm2,%%mm4\n" /* mm4 = (255- A) * 256 */ - "\tmovq %%mm1,%%mm3\n" - "\tpxor %%mm5,%%mm5\n" - "\tpunpcklbw %%mm5,%%mm3\n" - "\tmovq %4,%%mm5\n" - "\tpaddusw %%mm3,%%mm5\n" /* mm5 = B + 1 */ + "\tmovq %%mm1,%%mm3\n" + "\tpxor %%mm5,%%mm5\n" + "\tpunpcklbw %%mm5,%%mm3\n" + "\tmovq %4,%%mm5\n" + "\tpaddusw %%mm3,%%mm5\n" /* mm5 = B + 1 */ - "\t" pdivwqX(mm4,mm5,mm7) "\n" + "\t" pdivwqX(mm4,mm5,mm7) "\n" - "\tmovq %3,%%mm2\n" - "\tpsubb %%mm0,%%mm2\n" /* mm2 = 255 - A */ - "\tpxor %%mm4,%%mm4\n" - "\tpunpckhbw %%mm2,%%mm4\n" /* mm4 = (255- A) * 256 */ + "\tmovq %3,%%mm2\n" + "\tpsubb %%mm0,%%mm2\n" /* mm2 = 255 - A */ + "\tpxor %%mm4,%%mm4\n" + "\tpunpckhbw %%mm2,%%mm4\n" /* mm4 = (255- A) * 256 */ - "\tmovq %%mm1,%%mm3\n" - "\tpxor %%mm5,%%mm5\n" - "\tpunpckhbw %%mm5,%%mm3\n" - "\tmovq %4,%%mm5\n" - "\tpaddusw %%mm3,%%mm5\n" /* mm5 = B + 1 */ - "\t" pdivwqX(mm4,mm5,mm6) "\n" + "\tmovq %%mm1,%%mm3\n" + "\tpxor %%mm5,%%mm5\n" + "\tpunpckhbw %%mm5,%%mm3\n" + "\tmovq %4,%%mm5\n" + "\tpaddusw %%mm3,%%mm5\n" /* mm5 = B + 1 */ + "\t" pdivwqX(mm4,mm5,mm6) "\n" - "\tmovq %5,%%mm4\n" - "\tmovq %%mm4,%%mm5\n" - "\tpsubusw %%mm6,%%mm4\n" - "\tpsubusw %%mm7,%%mm5\n" + "\tmovq %5,%%mm4\n" + "\tmovq %%mm4,%%mm5\n" + "\tpsubusw %%mm6,%%mm4\n" + "\tpsubusw %%mm7,%%mm5\n" - "\tpackuswb %%mm4,%%mm5\n" + "\tpackuswb %%mm4,%%mm5\n" - "\tpminub %%mm0,%%mm1\n" /* mm1 = min(mm0,mm1) clobber mm3 */ + "\t" pminub(mm0,mm1,mm3) "\n" /* mm1 = min(mm0,mm1) clobber mm3 */ - "\tmovq %6,%%mm7\n" - "\tpand %%mm7,%%mm1\n" /* mm1 = mm7 & alpha_mask */ + "\tmovq %6,%%mm7\n" /* mm6 = rgba8_alpha_mask_64 */ + "\tpand %%mm7,%%mm1\n" /* mm1 = mm7 & alpha_mask */ - "\tpandn %%mm5,%%mm7\n" /* mm7 = ~mm7 & mm5 */ - "\tpor %%mm1,%%mm7\n" /* mm7 = mm7 | mm1 */ - - "\tmovq %%mm7,%0\n" - : "=m" (*op.D) - : "m" (*op.A), "m" (*op.B), "m" (*rgba8_b255_64), "m" (*rgba8_w1_64), "m" (*rgba8_w255_64), "m" (*rgba8_alpha_mask_64) - : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"); - op.A += 8; - op.B += 8; - op.D += 8; + "\tpandn %%mm5,%%mm7\n" /* mm7 = ~mm7 & mm5 */ + "\tpor %%mm1,%%mm7\n" /* mm7 = mm7 | mm1 */ + + "\tmovq %%mm7,%0\n" + : "=m" (*d) + : "m" (*a), "m" (*b), "m" (*rgba8_b255_64), "m" (*rgba8_w1_64), "m" (*rgba8_w255_64), "m" (*rgba8_alpha_mask_64) + : pdivwqX_clobber, "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"); + d++; + b++; + a++; } - if (op.n_pixels) + if (n_pixels > 0) { asm volatile (" movd %1,%%mm0\n" "\tmovd %2,%%mm1\n" @@ -300,7 +313,7 @@ gimp_composite_burn_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op) "\tpackuswb %%mm4,%%mm5\n" - "\tpminub %%mm0,%%mm1\n" /* mm1 = min(mm0,mm1) clobber mm3 */ + "\t" pminub(mm0,mm1,mm3) "\n" /* mm1 = min(mm0,mm1) clobber mm3 */ "\tmovq %6,%%mm7\n" "\tpand %%mm7,%%mm1\n" /* mm1 = mm7 & alpha_mask */ @@ -309,9 +322,9 @@ gimp_composite_burn_rgba8_rgba8_rgba8_sse (GimpCompositeContext *_op) "\tpor %%mm1,%%mm7\n" /* mm7 = mm7 | mm1 */ "\tmovd %%mm7,%0\n" - : "=m" (*op.D) - : "m" (*op.A), "m" (*op.B), "m" (*rgba8_b255_64), "m" (*rgba8_w1_64), "m" (*rgba8_w255_64), "m" (*rgba8_alpha_mask_64) - : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"); + : "=m" (*d) + : "m" (*a), "m" (*b), "m" (*rgba8_b255_64), "m" (*rgba8_w1_64), "m" (*rgba8_w255_64), "m" (*rgba8_alpha_mask_64) + : pdivwqX_clobber, "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"); } asm("emms"); diff --git a/app/composite/gimp-composite-sse2-installer.c b/app/composite/gimp-composite-sse2-installer.c index d4db80e6e3..8f56d3c920 100644 --- a/app/composite/gimp-composite-sse2-installer.c +++ b/app/composite/gimp-composite-sse2-installer.c @@ -16,7 +16,7 @@ static struct install_table { GimpPixelFormat D; void (*function)(GimpCompositeContext *); } _gimp_composite_sse2[] = { -#if (__GNUC__ >= 3) && defined(USE_SSE) && defined(ARCH_X86) && (defined(ARCH_X86_64) || !defined(PIC)) +#if defined(COMPILE_SSE2_IS_OKAY) { GIMP_COMPOSITE_DIFFERENCE, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_difference_rgba8_rgba8_rgba8_sse2 }, { GIMP_COMPOSITE_ADDITION, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_addition_rgba8_rgba8_rgba8_sse2 }, { GIMP_COMPOSITE_SUBTRACT, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, GIMP_PIXELFORMAT_RGBA8, gimp_composite_subtract_rgba8_rgba8_rgba8_sse2 }, diff --git a/app/composite/gimp-composite-sse2-test.c b/app/composite/gimp-composite-sse2-test.c index aca70d0244..61ce9f7466 100644 --- a/app/composite/gimp-composite-sse2-test.c +++ b/app/composite/gimp-composite-sse2-test.c @@ -19,7 +19,7 @@ int gimp_composite_sse2_test (int iterations, int n_pixels) { -#if (__GNUC__ >= 3) && defined(USE_SSE) && defined(ARCH_X86) && (defined(ARCH_X86_64) || !defined(PIC)) +#if defined(COMPILE_SSE2_IS_OKAY) GimpCompositeContext generic_ctx; GimpCompositeContext special_ctx; double ft0; @@ -155,7 +155,7 @@ main (int argc, char *argv[]) putenv ("GIMP_COMPOSITE=0x1"); - iterations = 1; + iterations = 10; n_pixels = 1048593; argv++, argc--; diff --git a/app/composite/gimp-composite-vis-test.c b/app/composite/gimp-composite-vis-test.c index ef76f7a00b..653b3b1fe2 100644 --- a/app/composite/gimp-composite-vis-test.c +++ b/app/composite/gimp-composite-vis-test.c @@ -19,7 +19,7 @@ int gimp_composite_vis_test (int iterations, int n_pixels) { -#if (__GNUC__ >= 3) && defined(USE_VIS) && defined(ARCH_SPARC) +#if defined(COMPILE_VIS_IS_OKAY) GimpCompositeContext generic_ctx; GimpCompositeContext special_ctx; double ft0; @@ -78,7 +78,7 @@ main (int argc, char *argv[]) putenv ("GIMP_COMPOSITE=0x1"); - iterations = 1; + iterations = 10; n_pixels = 1048593; argv++, argc--; diff --git a/app/composite/gimp-composite-vis.c b/app/composite/gimp-composite-vis.c index 6021cb591a..8c75da2c56 100644 --- a/app/composite/gimp-composite-vis.c +++ b/app/composite/gimp-composite-vis.c @@ -32,18 +32,14 @@ #include "gimp-composite.h" #include "gimp-composite-vis.h" -#if defined(USE_VIS) -#if defined(ARCH_SPARC) -#if __GNUC__ >= 3 +#ifdef COMPILE_VIS_IS_OKAY -#endif /* __GNUC__ > 3 */ -#endif /* defined(ARCH_SPARC) */ -#endif /* defined(USE_VIS) */ +#endif gboolean gimp_composite_vis_init (void) { -#ifdef ARCH_SPARC +#ifdef COMPILE_VIS_IS_OKAY return (TRUE); #else return (FALSE); diff --git a/app/composite/gimp-composite-vis.h b/app/composite/gimp-composite-vis.h index 46a81ca30c..d3ed74c3f1 100644 --- a/app/composite/gimp-composite-vis.h +++ b/app/composite/gimp-composite-vis.h @@ -9,4 +9,12 @@ extern gboolean gimp_composite_vis_init (void); */ extern gboolean gimp_composite_vis_install (void); +#if defined(USE_VIS) +#if defined(ARCH_SPARC) +#if __GNUC__ >= 3 +#define COMPILE_VIS_IS_OKAY (1) +#endif /* __GNUC__ > 3 */ +#endif /* defined(ARCH_SPARC) */ +#endif /* defined(USE_VIS) */ + #endif diff --git a/app/composite/gimp-composite-x86.h b/app/composite/gimp-composite-x86.h index aa22be575f..46c6b8f1e2 100644 --- a/app/composite/gimp-composite-x86.h +++ b/app/composite/gimp-composite-x86.h @@ -21,10 +21,16 @@ #if __GNUC__ >= 3 +/* + * Convert the low 8bit byte of the src to 16bit words in dst. + */ #define mmx_low_bytes_to_words(src,dst,zero) \ "\tmovq %%"#src", %%"#dst"; " \ "\tpunpcklbw %%"#zero", %%"#dst"\n" +/* + * Convert the high 8bit byte of the src to 16bit words in dst. + */ #define mmx_high_bytes_to_words(src,dst,zero) \ "\tmovq %%"#src", %%"#dst"; " \ "\tpunpckhbw %%"#zero", %%"#dst"\n" @@ -230,5 +236,18 @@ "\tpsrlw $8, %%"#opr2"\n" typedef unsigned long long uint64; - + +extern const guint32 rgba8_alpha_mask_64[2]; +extern const guint32 rgba8_b1_64[2]; +extern const guint32 rgba8_b255_64[2]; +extern const guint32 rgba8_w1_64[2]; +extern const guint32 rgba8_w2_64[2]; +extern const guint32 rgba8_w128_64[2]; +extern const guint32 rgba8_w256_64[2]; +extern const guint32 rgba8_w255_64[2]; + +extern const guint32 va8_alpha_mask[2]; +extern const guint32 va8_b255[2]; +extern const guint32 va8_w1[2]; +extern const guint32 va8_w255[2]; #endif /* __GNUC__ >= 3 */ diff --git a/app/composite/gimp-composite.c b/app/composite/gimp-composite.c index 60885a456a..983df21112 100644 --- a/app/composite/gimp-composite.c +++ b/app/composite/gimp-composite.c @@ -366,9 +366,12 @@ gimp_composite_init (gboolean be_verbose, gimp_composite_options.bits |= GIMP_COMPOSITE_OPTION_NOEXTENSIONS; #ifdef GIMP_UNSTABLE - g_printerr ("gimp_composite: use=%s, verbose=%s\n", - (gimp_composite_options.bits & GIMP_COMPOSITE_OPTION_USE) ? "yes" : "no", - (gimp_composite_options.bits & GIMP_COMPOSITE_OPTION_VERBOSE) ? "yes" : "no"); + if (be_verbose) + { + g_printerr ("gimp_composite: use=%s, verbose=%s\n", + (gimp_composite_options.bits & GIMP_COMPOSITE_OPTION_USE) ? "yes" : "no", + (gimp_composite_options.bits & GIMP_COMPOSITE_OPTION_VERBOSE) ? "yes" : "no"); + } #endif gimp_composite_generic_install (); @@ -400,14 +403,17 @@ gimp_composite_init (gboolean be_verbose, gboolean can_use_vis = gimp_composite_vis_install (); #ifdef GIMP_UNSTABLE - g_printerr ("supported by gimp_composite: " - "%cmmx %csse %csse2 %c3dnow %caltivec %cvis\n", - can_use_mmx ? '+' : '-', - can_use_sse ? '+' : '-', - can_use_sse2 ? '+' : '-', - can_use_3dnow ? '+' : '-', - can_use_altivec ? '+' : '-', - can_use_vis ? '+' : '-'); + if (be_verbose) + { + g_printerr ("Processor instruction sets: " + "%cmmx %csse %csse2 %c3dnow %caltivec %cvis\n", + can_use_mmx ? '+' : '-', + can_use_sse ? '+' : '-', + can_use_sse2 ? '+' : '-', + can_use_3dnow ? '+' : '-', + can_use_altivec ? '+' : '-', + can_use_vis ? '+' : '-'); + } #endif } } diff --git a/app/composite/make-installer.py b/app/composite/make-installer.py index 50fd908e55..2942590500 100755 --- a/app/composite/make-installer.py +++ b/app/composite/make-installer.py @@ -471,7 +471,7 @@ op.add_option('-f', '--file', action='store', type='string', dest='file', help='the input object file') op.add_option('-t', '--test', action='store_true', dest='test', default=False, help='generate regression testing code') -op.add_option('-i', '--iterations', action='store', type='int', dest='iterations', default=1, +op.add_option('-i', '--iterations', action='store', type='int', dest='iterations', default=10, help='number of iterations in regression tests') op.add_option('-n', '--n-pixels', action='store', type="int", dest='n_pixels', default=128*8192+16+1, help='number of pixels in each regression test iteration')