From d02bc5453b07eb293da5fb88e7b995c4b681e242 Mon Sep 17 00:00:00 2001 From: Helvetix Victorinox Date: Tue, 20 Jul 2004 06:33:03 +0000 Subject: [PATCH] More clobber register corrections. --- ChangeLog | 8 ++ app/composite/gimp-composite-mmx.c | 146 ++++++++++++++++------------- app/composite/gimp-composite-x86.h | 1 + 3 files changed, 89 insertions(+), 66 deletions(-) diff --git a/ChangeLog b/ChangeLog index 07fd4ed2a5..073671a8a3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2004-07-19 Helvetix Victorinox + + * app/composite/gimp-composite-mmx.c (xxxgimp_composite_dodge_rgba8_rgba8_rgba8_mmx) + * app/composite/gimp-composite-mmx.c (xxxgimp_composite_divide_rgba8_rgba8_rgba8_mmx) + * app/composite/gimp-composite-mmx.c (gimp_composite_difference_rgba8_rgba8_rgba8_mmx) + * app/composite/gimp-composite-mmx.c (gimp_composite_darken_rgba8_rgba8_rgba8_mmx): + More clobber register corrections. + 2004-07-20 Sven Neumann * Made 2.1.2 release. diff --git a/app/composite/gimp-composite-mmx.c b/app/composite/gimp-composite-mmx.c index 46127a3184..d67d805aa4 100644 --- a/app/composite/gimp-composite-mmx.c +++ b/app/composite/gimp-composite-mmx.c @@ -85,13 +85,14 @@ gimp_composite_addition_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) uint64 *d = (uint64 *) _op->D; uint64 *a = (uint64 *) _op->A; uint64 *b = (uint64 *) _op->B; + gulong n_pixels = _op->n_pixels; asm volatile ("movq %0,%%mm0" : /* empty */ : "m" (*rgba8_alpha_mask) : "%mm0"); - for (; _op->n_pixels >= 2; _op->n_pixels -= 2) + for (; n_pixels >= 2; n_pixels -= 2) { asm volatile (" movq %1, %%mm2\n" "\tmovq %2, %%mm3\n" @@ -111,7 +112,7 @@ gimp_composite_addition_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) d++; } - if (_op->n_pixels) + if (n_pixels > 0) { asm volatile (" movd %1, %%mm2\n" "\tmovd %2, %%mm3\n" @@ -137,8 +138,9 @@ gimp_composite_burn_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) uint64 *d = (uint64 *) _op->D; uint64 *a = (uint64 *) _op->A; uint64 *b = (uint64 *) _op->B; + gulong n_pixels = _op->n_pixels; - for (; _op->n_pixels >= 2; _op->n_pixels -= 2) + for (; n_pixels >= 2; n_pixels -= 2) { asm volatile (" movq %1,%%mm0\n" "\tmovq %2,%%mm1\n" @@ -192,7 +194,7 @@ gimp_composite_burn_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) a++; } - if (_op->n_pixels) + if (n_pixels > 0) { asm volatile (" movd %1,%%mm0\n" "\tmovd %2,%%mm1\n" @@ -250,31 +252,34 @@ gimp_composite_burn_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) void gimp_composite_darken_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) { - GimpCompositeContext op = *_op; + uint64 *d = (uint64 *) _op->D; + uint64 *a = (uint64 *) _op->A; + uint64 *b = (uint64 *) _op->B; + gulong n_pixels = _op->n_pixels; - for (; op.n_pixels >= 2; op.n_pixels -= 2) + for (; n_pixels >= 2; n_pixels -= 2) { asm volatile (" movq %1, %%mm2\n" "\tmovq %2, %%mm3\n" "\t" pminub(mm3, mm2, mm4) "\n" "\tmovq %%mm2, %0\n" - : "=m" (*op.D) - : "m" (*op.A), "m" (*op.B) + : "=m" (*d) + : "m" (*a), "m" (*b) : "%mm1", "%mm2", "%mm3", "%mm4"); - op.A += 8; - op.B += 8; - op.D += 8; + a++; + b++; + d++; } - if (op.n_pixels) + if (n_pixels > 0) { - asm volatile (" movd %0, %%mm2\n" - "\tmovd %1, %%mm3\n" + asm volatile (" movd %1, %%mm2\n" + "\tmovd %2, %%mm3\n" "\t" pminub(mm3, mm2, mm4) "\n" - "\tmovd %%mm2, %2\n" - : /* empty */ - : "m" (*op.A), "m" (*op.B), "m" (*op.D) - : "0", "1", "2", "%mm2", "%mm3", "%mm4"); + "\tmovd %%mm2, %0\n" + : "=m" (*d) + : "m" (*a), "m" (*b) + : "%mm2", "%mm3", "%mm4"); } asm("emms"); @@ -283,14 +288,17 @@ gimp_composite_darken_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) void gimp_composite_difference_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) { - GimpCompositeContext op = *_op; + uint64 *d = (uint64 *) _op->D; + uint64 *a = (uint64 *) _op->A; + uint64 *b = (uint64 *) _op->B; + gulong n_pixels = _op->n_pixels; asm volatile ("movq %0,%%mm0" : : "m" (*rgba8_alpha_mask) : "%mm0"); - for (; op.n_pixels >= 2; op.n_pixels -= 2) + for (; n_pixels >= 2; n_pixels -= 2) { - asm volatile (" movq %0, %%mm2\n" - "\tmovq %1, %%mm3\n" + asm volatile (" movq %1, %%mm2\n" + "\tmovq %2, %%mm3\n" "\tmovq %%mm2, %%mm4\n" "\tmovq %%mm3, %%mm5\n" "\tpsubusb %%mm3, %%mm4\n" @@ -301,19 +309,19 @@ gimp_composite_difference_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) "\t" pminub(mm3,mm2,mm4) "\n" "\tpand %%mm0, %%mm2\n" "\tpor %%mm2, %%mm1\n" - "\tmovq %%mm1, %2\n" - : /* empty */ - : "m" (*op.A), "m" (*op.B), "m" (*op.D) - : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4"); - op.A += 8; - op.B += 8; - op.D += 8; + "\tmovq %%mm1, %0\n" + : "=m" (*d) + : "m" (*a), "m" (*b) + : "%mm1", "%mm2", "%mm3", "%mm4"); + a++; + b++; + d++; } - if (op.n_pixels) + if (n_pixels) { - asm volatile (" movd %0, %%mm2\n" - "\tmovd %1, %%mm3\n" + asm volatile (" movd %1, %%mm2\n" + "\tmovd %2, %%mm3\n" "\tmovq %%mm2, %%mm4\n" "\tmovq %%mm3, %%mm5\n" "\tpsubusb %%mm3, %%mm4\n" @@ -324,10 +332,10 @@ gimp_composite_difference_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) "\t" pminub(mm3,mm2,mm4) "\n" "\tpand %%mm0, %%mm2\n" "\tpor %%mm2, %%mm1\n" - "\tmovd %%mm1, %2\n" - : /* empty */ - : "m" (*op.A), "m" (*op.B), "m" (*op.D) - : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4"); + "\tmovd %%mm1, %0\n" + : "=m" (*d) + : "m" (*a), "m" (*b) + : "%mm1", "%mm2", "%mm3", "%mm4"); } asm("emms"); @@ -336,7 +344,10 @@ gimp_composite_difference_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) void xxxgimp_composite_divide_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) { - GimpCompositeContext op = *_op; + uint64 *d = (uint64 *) _op->D; + uint64 *a = (uint64 *) _op->A; + uint64 *b = (uint64 *) _op->B; + gulong n_pixels = _op->n_pixels; asm volatile (" movq %0, %%mm0\n" "\tmovq %1, %%mm7\n" @@ -344,10 +355,10 @@ xxxgimp_composite_divide_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) : "m" (*rgba8_alpha_mask), "m" (*rgba8_w1) : "%mm0", "%mm7"); - for (; op.n_pixels >= 2; op.n_pixels -= 2) + for (; n_pixels >= 2; n_pixels -= 2) { - asm volatile (" movq %0,%%mm0\n" - "\tmovq %1,%%mm1\n" + asm volatile (" movq %1,%%mm0\n" + "\tmovq %2,%%mm1\n" "\tpxor %%mm2,%%mm2\n" "\tpunpcklbw %%mm0,%%mm2\n" /* mm2 = A*256 */ @@ -379,19 +390,19 @@ xxxgimp_composite_divide_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) "\tpand %%mm2,%%mm1\n" "\tpor %%mm1,%%mm3\n" - "\tmovq %%mm3,%2\n" - : /* empty */ - : "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*rgba8_alpha_mask) + "\tmovq %%mm3,%0\n" + : "=m" (*d) + : "m" (*a), "m" (*b), "m" (*rgba8_alpha_mask) : "%eax", "%ecx", "%edx", "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); - op.A += 8; - op.B += 8; - op.D += 8; + a++; + b++; + d++; } - if (op.n_pixels) + if (n_pixels) { - asm volatile (" movd %0,%%mm0\n" - "\tmovd %1,%%mm1\n" + asm volatile (" movd %1,%%mm0\n" + "\tmovd %2,%%mm1\n" "\tpxor %%mm2,%%mm2\n" "\tpunpcklbw %%mm0,%%mm2\n" /* mm2 = A*256 */ @@ -423,9 +434,9 @@ xxxgimp_composite_divide_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) "\tpand %%mm2,%%mm1\n" "\tpor %%mm1,%%mm3\n" - "\tmovd %%mm3,%2\n" - : /* empty */ - : "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*rgba8_alpha_mask) + "\tmovd %%mm3,%0\n" + : "=m" (*d) + : "m" (*a), "m" (*b), "m" (*rgba8_alpha_mask) : "%eax", "%ecx", "%edx", "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); } @@ -435,12 +446,15 @@ xxxgimp_composite_divide_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) void xxxgimp_composite_dodge_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) { - GimpCompositeContext op = *_op; + uint64 *d = (uint64 *) _op->D; + uint64 *a = (uint64 *) _op->A; + uint64 *b = (uint64 *) _op->B; + gulong n_pixels = _op->n_pixels; - for (; op.n_pixels >= 2; op.n_pixels -= 2) + for (; n_pixels >= 2; n_pixels -= 2) { - asm volatile (" movq %0,%%mm0\n" - "\tmovq %1,%%mm1\n" + asm volatile (" movq %1,%%mm0\n" + "\tmovq %2,%%mm1\n" "\tmovq %%mm1,%%mm3\n" "\tpxor %%mm2,%%mm2\n" "\tpunpcklbw %%mm2,%%mm3\n" @@ -471,16 +485,16 @@ xxxgimp_composite_dodge_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) "\tpor %%mm6,%%mm7\n" - "\tmovq %%mm7,%2\n" - : /* empty */ - : "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*rgba8_w256), "m" (*rgba8_alpha_mask) - : "0", "1", "2", "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"); - op.A += 8; - op.B += 8; - op.D += 8; + "\tmovq %%mm7,%0\n" + : "=m" (*d) + : "m" (*a), "m" (*b), "m" (*rgba8_w256), "m" (*rgba8_alpha_mask) + : pdivwuqX_clobber, "%mm0", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5", "%mm6", "%mm7"); + a++; + b++; + d++; } - if (op.n_pixels) + if (n_pixels) { asm volatile (" movd %0,%%mm0\n" "\tmovq %1,%%mm1\n" @@ -516,8 +530,8 @@ xxxgimp_composite_dodge_rgba8_rgba8_rgba8_mmx (GimpCompositeContext *_op) "\tmovd %%mm7,%2\n" : /* empty */ - : "m" (*op.A), "m" (*op.B), "m" (*op.D), "m" (*rgba8_w256), "m" (*rgba8_alpha_mask) - : "0", "1", "2", "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); + : "m" (*a), "m" (*b), "m" (*d), "m" (*rgba8_w256), "m" (*rgba8_alpha_mask) + : pdivwuqX_clobber, "%mm1", "%mm2", "%mm3", "%mm4", "%mm5"); } asm("emms"); diff --git a/app/composite/gimp-composite-x86.h b/app/composite/gimp-composite-x86.h index 674796e5b7..aa22be575f 100644 --- a/app/composite/gimp-composite-x86.h +++ b/app/composite/gimp-composite-x86.h @@ -117,6 +117,7 @@ "movd %%eax,%%" #divisor ";" \ "psllq $32,%%" #divisor ";" \ "por %%" #divisor ",%%" #quotient ";" +#define pdivwuqX_clobber pdivwqX_clobber #define xmm_pdivwqX(dividend,divisor,quotient,scratch) "movd %%" #dividend ",%%eax; " \ "movd %%" #divisor ",%%ecx; " \